From 071e6175dcc130b4c623e849a380d6434289eb66 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:00 +0200 Subject: [PATCH 001/411] Added the -Thost=x64 flag to cmake build instructions --- tensorflow/contrib/cmake/README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 0b79f718d4..5c203b777c 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -106,17 +106,6 @@ Step-by-step Windows build 1. Install the prerequisites detailed above, and set up your environment. - * The following commands assume that you are using the Windows Command - Prompt (`cmd.exe`). You will need to set up your environment to use the - appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets - we will build are too large for the 32-bit tools, and they will fail with - out-of-memory errors.) The typical command to do set up your - environment is: - - ``` - D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" - ``` - * When building with GPU support after installing the CUDNN zip file from NVidia, append its bin directory to your PATH environment variable. In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable. @@ -168,7 +157,7 @@ Step-by-step Windows build and must be the last character on each line. ``` - D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ + D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^ More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib @@ -197,6 +186,10 @@ Step-by-step Windows build not currently supported, because it relies on a `Debug` library for Python (`python35d.lib`) that is not distributed by default. + The `-Thost=x64` flag will ensure that the 64 bit compiler and linker + is used when building. Without this flag, MSBuild will use the 32 bit + toolchain which is prone to compile errors such as "compiler out of heap space". + There are various options that can be specified when generating the solution and project files: @@ -263,6 +256,11 @@ Step-by-step Windows build 4. Invoke MSBuild to build TensorFlow. + Set up the path to find MSbuild: + ``` + D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" + ``` + To build the C++ example program, which will be created as a `.exe` executable in the subdirectory `.\Release`: -- GitLab From 6890731b2693f6b71dedaca6b2eaf8b488226836 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:22 +0200 Subject: [PATCH 002/411] increase minimum cmake version required to 3.8 --- tensorflow/contrib/cmake/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 0708d6b7b9..225c5e6227 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,5 +1,9 @@ # Minimum CMake required -cmake_minimum_required(VERSION 3.5) +if(WIN32) + cmake_minimum_required(VERSION 3.8) +else() + cmake_minimum_required(VERSION 3.5) +endif() # Project project(tensorflow C CXX) -- GitLab From 537bd0d8237d77c789c1b7633d8ba4b68007f52e Mon Sep 17 00:00:00 2001 From: Andy Craze Date: Sun, 12 Aug 2018 14:40:04 -0700 Subject: [PATCH 003/411] Update Nesterov implementation docs Clarification that this is a modified version of the algorithm which is only correct under certain conditions Fixes #19899 --- tensorflow/python/training/momentum.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py index cb3ec6f053..34c74cda4e 100644 --- a/tensorflow/python/training/momentum.py +++ b/tensorflow/python/training/momentum.py @@ -59,6 +59,10 @@ class MomentumOptimizer(optimizer.Optimizer): This implementation always computes gradients at the value of the variable(s) passed to the optimizer. Using Nesterov Momentum makes the variable(s) track the values called `theta_t + mu*v_t` in the paper. + This implementation is an approximation of the original formula, valid + for high values of momentum. It will compute the "adjusted gradient" in NAG + by assuming that the new gradient will be estimated by the current + average gradient plus the product of momentum and the change in the average gradient. @compatibility(eager) When eager execution is enabled, `learning_rate` and `momentum` can each be -- GitLab From aa25cc078c9b55e5ca3e0f59df43e169bfee8f3c Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Thu, 16 Aug 2018 19:04:37 +0800 Subject: [PATCH 004/411] Add LeakyRelu C++ Op and its gradient implementation. LeakyRelu, defined as 'y = { x (x>=0) or alpha*x (x<0) }', was computed by combined Ops 'max(x, alpha*x)' in current codes. Hence its gradient calculation for back propagation would contain a serial of element-wise Ops. This looks really unnecessary for such a simple op and it could be done within just one Op with less memory accesses. --- tensorflow/cc/gradients/nn_grad.cc | 13 ++ tensorflow/cc/gradients/nn_grad_test.cc | 13 ++ tensorflow/core/kernels/relu_op.cc | 153 +++++++++++------- tensorflow/core/kernels/relu_op.h | 59 +++++++ tensorflow/core/kernels/relu_op_functor.h | 31 ++++ tensorflow/core/kernels/relu_op_gpu.cu.cc | 18 ++- tensorflow/core/ops/nn_ops.cc | 15 ++ tensorflow/core/ops/ops.pbtxt | 68 ++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 2 + .../python/kernel_tests/relu_op_test.py | 113 +++++++++++++ tensorflow/python/ops/nn_grad.py | 15 ++ tensorflow/python/ops/nn_ops.py | 3 +- 12 files changed, 432 insertions(+), 71 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 588e96cb19..0fc23d0bf7 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -143,6 +143,19 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper); +Status LeakyReluGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + attrs.Alpha(alpha); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index aa72cf7ba2..5ebece7b6e 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -41,6 +41,7 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; +using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -160,6 +161,18 @@ TEST_F(NNGradTest, Relu6Grad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = LeakyRelu(scope_, x); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index d52358737f..c4f2ef5632 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -33,19 +33,25 @@ typedef Eigen::GpuDevice GPUDevice; typedef Eigen::SyclDevice SYCLDevice; #endif // TENSORFLOW_USE_SYCL -#define REGISTER_RELU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6GradOp) +#define REGISTER_RELU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6GradOp) \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluGradOp); TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); #undef REGISTER_RELU_KERNELS @@ -99,6 +105,19 @@ namespace functor { extern template struct Relu6Grad; \ \ template <> \ + void LeakyRelu::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor activations); \ + extern template struct LeakyRelu; \ + \ + template <> \ + void LeakyReluGrad::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor gradients, \ + typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor backprops); \ + extern template struct LeakyReluGrad; \ + \ + template <> \ void Elu::operator()(const GPUDevice& d, \ typename TTypes::ConstTensor features, \ typename TTypes::Tensor activations); \ @@ -128,30 +147,36 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); } // namespace functor // Registration of the GPU implementations. -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); @@ -161,30 +186,36 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #ifdef TENSORFLOW_USE_SYCL // Registration of the GPU implementations. -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ +#define REGISTER_SYCL_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index e712b02bd7..c55190065c 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -131,6 +131,65 @@ void Relu6GradOp::OperateNoTemplate(OpKernelContext* context, output->flat()); } +template +class LeakyReluOp : public UnaryElementWiseOp> { + public: + explicit LeakyReluOp(OpKernelConstruction* context) + : UnaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { + functor::LeakyRelu functor; + functor(context->eigen_device(), input.flat(), + alpha_, output->flat()); + } + + private: + T alpha_; +}; + +template +class LeakyReluGradOp + : public BinaryElementWiseOp> { + public: + explicit LeakyReluGradOp(OpKernelConstruction* context) + : BinaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void OperateNoTemplate(OpKernelContext* context, const Tensor& g, + const Tensor& a, T alpha, Tensor* output); + + // INPUTS: + // g (gradients): backpropagated gradients + // a (inputs): either the inputs that were passed to LeakyReluOp(), or its + // outputs (using either one yields the same result here). + // OUTPUT: + // gradients to backprop + template + void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a, + Tensor* output) { + OperateNoTemplate(context, g, a, alpha_, output); + } + + private: + T alpha_; +}; + +template +void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, + const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + if (!ReluHelpers::ValidateSameSize(context, g, a)) return; + functor::LeakyReluGrad functor; + functor(context->eigen_device(), g.flat(), a.flat(), alpha, + output->flat()); +}; + template class EluOp : public UnaryElementWiseOp> { public: diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 3bc5ba8a50..7f0951451d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,6 +91,37 @@ struct Relu6Grad { } }; + +// Functor used by LeakyReluOp to do the computations. +template +struct LeakyRelu { + // Computes LeakyRelu activation. + // + // features: any shape. + // activations: same shape as "features". + void operator()(const Device& d, typename TTypes::ConstTensor features, + T alpha, typename TTypes::Tensor activations) { + activations.device(d) = features.cwiseMax(features * alpha); + } +}; + +// Functor used by LeakyReluGradOp to do the computations. +template +struct LeakyReluGrad { + // Computes LeakyReluGrad backprops. + // + // gradients: gradients backpropagated to the LeakyRelu op. + // features: either the inputs that were passed to the LeakyRelu or, or its + // outputs (using either one yields the same result here). + // backprops: gradients to backpropagate to the LeakyRelu inputs. + void operator()(const Device& d, typename TTypes::ConstTensor gradients, + typename TTypes::ConstTensor features, T alpha, + typename TTypes::Tensor backprops) { + backprops.device(d) = + (features > static_cast(0)).select(gradients, gradients * alpha); + } +}; + // Functor used by EluOp to do the computations. template struct Elu { diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index 089ca8ed27..4452f4dcc9 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -114,14 +114,16 @@ struct ReluGrad { } // namespace functor // Definition of the GPU implementations declared in relu_op.cc. -#define DEFINE_GPU_KERNELS(T) \ - template struct functor::Relu; \ - template struct functor::ReluGrad; \ - template struct functor::Relu6; \ - template struct functor::Relu6Grad; \ - template struct functor::Elu; \ - template struct functor::EluGrad; \ - template struct functor::Selu; \ +#define DEFINE_GPU_KERNELS(T) \ + template struct functor::Relu; \ + template struct functor::ReluGrad; \ + template struct functor::Relu6; \ + template struct functor::Relu6Grad; \ + template struct functor::LeakyRelu; \ + template struct functor::LeakyReluGrad; \ + template struct functor::Elu; \ + template struct functor::EluGrad; \ + template struct functor::Selu; \ template struct functor::SeluGrad; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e0f25fb4ef..023f988f80 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -983,6 +983,21 @@ REGISTER_OP("Relu6Grad") .Attr("T: realnumbertype") .SetShapeFn(shape_inference::MergeBothInputsShapeFn); +REGISTER_OP("LeakyRelu") + .Input("features: T") + .Output("activations: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::UnchangedShape); + +REGISTER_OP("LeakyReluGrad") + .Input("gradients: T") + .Input("features: T") + .Output("backprops: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::MergeBothInputsShapeFn); + REGISTER_OP("Elu") .Input("features: T") .Output("activations: T") diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index f2595279e0..837e91bc23 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13604,6 +13604,74 @@ op { minimum: 1 } } +op { + name: "LeakyRelu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "LeakykReluGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 2d54555cd3..9b3b5fd7aa 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", "Dilation2D", @@ -1799,6 +1800,7 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", + "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 25e947f09e..ccb3a231bb 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -252,6 +252,119 @@ class Relu6Test(test.TestCase): self.assertLess(err, 1e-10) +class LeakyReluTest(test.TestCase): + + def _npLeakyRelu(self, np_features, alpha=0.1): + return np.maximum(np_features, alpha * np_features) + + def testNpLeakyRelu(self): + self.assertAllClose( + np.array([[-0.09, 0.7, -0.05, 0.3, -0.01], + [0.1, -0.03, 0.5, -0.07, 0.9]]), + self._npLeakyRelu( + np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9] + ]), alpha=0.1)) + + def _testLeakyRelu(self, np_features, alpha, use_gpu=False): + np_leaky_relu = self._npLeakyRelu(np_features, alpha) + with self.test_session(use_gpu=use_gpu): + leaky_relu = nn_ops.leaky_relu(np_features, alpha) + tf_leaky_relu = leaky_relu.eval() + self.assertAllClose(np_leaky_relu, tf_leaky_relu) + self.assertShapeEqual(np_leaky_relu, leaky_relu) + + def testNumbers(self): + for t in [np.int32, np.int64, np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.2, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.1, use_gpu=True) + + # The gradient test for ReLU is a bit tricky as the derivative is not well + # defined at around zero and we want to avoid that in terms of input values. + def testGradientFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradientFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.2, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradGradFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradGradFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradientScalar(self): + with self.test_session() as sess: + x = variables.Variable(-100.) + y = nn_ops.leaky_relu(x, 0.05) + loss = y**2 + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.2) + train_op = optimizer.minimize(loss) + sess.run(variables.global_variables_initializer()) + sess.run(train_op) + self.assertAllClose(x.eval(), -99.9) + + class EluTest(test.TestCase): def _npElu(self, np_features): diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index df23ac55ce..c2dd58bdf0 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -390,6 +390,21 @@ def _Relu6GradGrad(op, grad): array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) +@ops.RegisterGradient("LeakyRelu") +def _LeakyReluGrad(op, grad): + x = op.inputs[0] + alpha = op.get_attr("alpha") + return gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha) + + +@ops.RegisterGradient("LeakyReluGrad") +def _LeakyReluGradGrad(op, grad): + x = op.inputs[1] + alpha = op.get_attr("alpha") + return (gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha), + array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) + + @ops.RegisterGradient("Elu") def _EluGrad(op, grad): return gen_nn_ops.elu_grad(grad, op.outputs[0]) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 6fd1273687..31b8f3945d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,8 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features, name=name) + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) def _flatten_outer_dims(logits): -- GitLab From cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Fri, 24 Aug 2018 11:51:34 +0800 Subject: [PATCH 005/411] Refine LeakyRelu codes and update APIs. --- .../api_def/base_api/api_def_LeakyRelu.pbtxt | 4 ++++ .../base_api/api_def_LeakyReluGrad.pbtxt | 24 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 2 +- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt new file mode 100644 index 0000000000..4a61889f54 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LeakyRelu" + summary: "Computes rectified linear: `max(features, features * alpha)`." +} diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt new file mode 100644 index 0000000000..e427526602 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt @@ -0,0 +1,24 @@ +op { + graph_op_name: "LeakyReluGrad" + visibility: HIDDEN + in_arg { + name: "gradients" + description: < 0) + alpha * gradients * (featurs <= 0)`. +END + } + summary: "Computes rectified linear gradients for a LeakyRelu operation." +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 837e91bc23..7693c2d485 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13637,7 +13637,7 @@ op { } } op { - name: "LeakykReluGrad" + name: "LeakyReluGrad" input_arg { name: "gradients" type_attr: "T" diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9b3b5fd7aa..18fafd0de1 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyRelu", "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", @@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", - "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", -- GitLab From 877358f68fcfd3ca06fdec87007e0cc90502f202 Mon Sep 17 00:00:00 2001 From: David Norman Date: Fri, 24 Aug 2018 17:13:53 +0100 Subject: [PATCH 006/411] Allow for disabling these tests via manifest --- tensorflow/compiler/xla/tests/while_test.cc | 46 ++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 1bdf1867b9..e6c69a5a86 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -48,7 +48,7 @@ class WhileTest : public ClientLibraryTestBase {}; // while (result < 5) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithScalarS32Result) { +XLA_TEST_F(WhileTest, WhileWithScalarS32Result) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -84,7 +84,7 @@ TEST_F(WhileTest, WhileWithScalarS32Result) { // while (result < 5) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithScalarS64Result) { +XLA_TEST_F(WhileTest, WhileWithScalarS64Result) { auto result_shape = ShapeUtil::MakeShape(S64, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -114,7 +114,7 @@ TEST_F(WhileTest, WhileWithScalarS64Result) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { +XLA_TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { auto result_shape = ShapeUtil::MakeShape(S32, {}); auto orig_shape = ShapeUtil::MakeShape(S32, {2}); @@ -147,7 +147,7 @@ TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithPredicateResult) { +XLA_TEST_F(WhileTest, WhileWithPredicateResult) { auto result_shape = ShapeUtil::MakeShape(PRED, {}); // Create a computation for the condition: run until condition is true. @@ -184,7 +184,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) { // while (result.sum() < 15.5f) { // result = result + vector(0); // } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { Shape result_shape = ShapeUtil::MakeShape(F32, {0}); // Create a computation for the reduction. @@ -238,7 +238,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { // while (result.sum() < 15.5f) { // result = result + vector(8, 0.125f); // } -TEST_F(WhileTest, WhileWithVectorResult) { +XLA_TEST_F(WhileTest, WhileWithVectorResult) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. @@ -298,7 +298,7 @@ TEST_F(WhileTest, WhileWithVectorResult) { // result = result + vector(8, 0.125f); // } // tuple = tuple { while } -TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { +XLA_TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. @@ -353,7 +353,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { +XLA_TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; @@ -408,7 +408,7 @@ TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { +XLA_TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; @@ -466,7 +466,7 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { // get<0>(result) = get<0>(result) + 1; // get<1>(result) = get<1>(result) + vector(10, 1.0f); // } -TEST_F(WhileTest, WhileWithTupleResult) { +XLA_TEST_F(WhileTest, WhileWithTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -516,7 +516,7 @@ TEST_F(WhileTest, WhileWithTupleResult) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPredicateTupleResult) { +XLA_TEST_F(WhileTest, WhileWithPredicateTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(PRED, {})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -562,7 +562,7 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0)); } -TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { +XLA_TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -622,7 +622,7 @@ TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { // get<1>(w1) = get<1>(w1) + vector(10, 1.0f); // } // result = get<1>(w0) + get<1>(w1) -TEST_F(WhileTest, TwoWhileWithTupleResult) { +XLA_TEST_F(WhileTest, TwoWhileWithTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -701,7 +701,7 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) { } // Test while nodes that share the while body computation. -TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { +XLA_TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -768,7 +768,7 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { // Test while nodes that share the while body computation. // TODO(b/37245345): Fails on GPU backend. -TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) { +XLA_TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -907,7 +907,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Per backend the values generated can be different as the different backends // use different random number generators. // TODO(b/32240857): Extend test to verify outputs. -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { auto v6s32 = ShapeUtil::MakeShape(S32, {6}); // Create a computation for the condition: repeat for count iterations. @@ -953,7 +953,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { } } -TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { +XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { auto element_shape = ShapeUtil::MakeShape(F32, {2}); XlaBuilder outer("outer"); @@ -985,7 +985,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { ErrorSpec(1e-6)); } -TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { +XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { auto element_shape = ShapeUtil::MakeShape(F32, {2}); XlaBuilder outer("outer"); @@ -1010,7 +1010,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { ErrorSpec(1e-6)); } -TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { +XLA_TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { auto element_shape = ShapeUtil::MakeShape(F32, {}); XlaBuilder outer("outer"); @@ -1044,7 +1044,7 @@ TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { // result[0] = result[0] + 1; // result[1] = result[1] + 1; // } -TEST_F(WhileTest, WhileWithMixedTupleElements) { +XLA_TEST_F(WhileTest, WhileWithMixedTupleElements) { auto result_shape = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})}); @@ -1152,7 +1152,7 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // while (f(result).get<0>()) { // result = result + 1; // } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -1192,7 +1192,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithLoopInvariantOperation) { +XLA_TEST_F(WhileTest, WhileWithLoopInvariantOperation) { auto matrix_shape = ShapeUtil::MakeShape(F32, {2, 2}); auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); auto while_shape = ShapeUtil::MakeTupleShape( @@ -1236,7 +1236,7 @@ TEST_F(WhileTest, WhileWithLoopInvariantOperation) { {param_value.get()}, ErrorSpec(4e-5)); } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) { auto while_shape = ShapeUtil::MakeShape(S32, {}); XlaComputation condition; -- GitLab From 4e72dd865a3fc83baa69f6b7c08720a1b546a464 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 29 Aug 2018 17:05:43 +0800 Subject: [PATCH 007/411] Refine LeakyRelu codes. 1. Add C++ gradient of gradient definition of LeakyReLu and revalant UT. 2. Using forward compatibility layer for python code changes. --- tensorflow/cc/gradients/nn_grad.cc | 18 ++++- tensorflow/cc/gradients/nn_grad_test.cc | 16 +++++ .../python/kernel_tests/relu_op_test.py | 70 ++++++++++--------- tensorflow/python/ops/nn_ops.py | 5 +- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0fc23d0bf7..2a32a2ed6f 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -149,13 +149,27 @@ Status LeakyReluGradHelper(const Scope& scope, const Operation& op, float alpha; TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); internal::LeakyReluGrad::Attrs attrs; - attrs.Alpha(alpha); - auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), + attrs.Alpha(alpha)); grad_outputs->push_back(dx); return scope.status(); } REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); +Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 5ebece7b6e..bf0db1f59d 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/gradient_checker.h" #include "tensorflow/cc/framework/testutil.h" #include "tensorflow/cc/gradients/grad_testutil.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -173,6 +174,21 @@ TEST_F(NNGradTest, LeakyReluGrad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGradGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, + {5, 2}); + Tensor features = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + auto y = ops::internal::LeakyReluGrad(scope_, x, features); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index ccb3a231bb..7066f28883 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -283,8 +284,9 @@ class LeakyReluTest(test.TestCase): np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), alpha=0.1, use_gpu=True) - # The gradient test for ReLU is a bit tricky as the derivative is not well - # defined at around zero and we want to avoid that in terms of input values. + # The gradient test for Leaky ReLU is a bit tricky as the derivative is not + # well defined at around zero and we want to avoid that in terms of input + # values. def testGradientFloat32(self): with self.test_session(): x = constant_op.constant( @@ -319,39 +321,41 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float32) gradient of gradient err = ", err) - self.assertLess(err, 1e-4) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float64) gradient of gradient err = ", err) - self.assertLess(err, 1e-10) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) def testGradientScalar(self): with self.test_session() as sess: diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 31b8f3945d..52ea202636 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,7 +1601,10 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + if compat.forward_compatible(2018, 10, 1): + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): -- GitLab From 2586eb3bfeeef3af357e438ae5aff92d2bac12a5 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 11:48:35 +0800 Subject: [PATCH 008/411] Code fix against ci_build error results. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +- tensorflow/core/kernels/relu_op.cc | 8 +-- tensorflow/core/kernels/relu_op.h | 8 +-- tensorflow/core/kernels/relu_op_functor.h | 1 - .../python/kernel_tests/relu_op_test.py | 50 +++++++++---------- .../tools/api/golden/v1/tensorflow.pbtxt | 4 ++ 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index bf0db1f59d..d8c2a1a0fc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -180,8 +180,7 @@ TEST_F(NNGradTest, LeakyReluGradGrad) { // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( - {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, - {5, 2}); + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2}); Tensor features = test::AsTensor( {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, {5, 2}); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index c4f2ef5632..cafa49cbb6 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -106,15 +106,15 @@ namespace functor { \ template <> \ void LeakyRelu::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor activations); \ + const GPUDevice& d, typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor activations); \ extern template struct LeakyRelu; \ \ template <> \ void LeakyReluGrad::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor gradients, \ - typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor backprops); \ + typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor backprops); \ extern template struct LeakyReluGrad; \ \ template <> \ diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index c55190065c..fa79ab03ae 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp> { void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { functor::LeakyRelu functor; - functor(context->eigen_device(), input.flat(), - alpha_, output->flat()); + functor(context->eigen_device(), input.flat(), alpha_, + output->flat()); } private: @@ -183,7 +183,9 @@ class LeakyReluGradOp template void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, - const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + const Tensor& g, + const Tensor& a, T alpha, + Tensor* output) { if (!ReluHelpers::ValidateSameSize(context, g, a)) return; functor::LeakyReluGrad functor; functor(context->eigen_device(), g.flat(), a.flat(), alpha, diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 7f0951451d..548d5a277d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,7 +91,6 @@ struct Relu6Grad { } }; - // Functor used by LeakyReluOp to do the computations. template struct LeakyRelu { diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 7066f28883..3e24b8a2c4 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -323,37 +323,37 @@ class LeakyReluTest(test.TestCase): def testGradGradFloat32(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4) def testGradGradFloat64(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float64) gradient of gradient err = ", err) self.assertLess(err, 1e-10) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 4de662fe33..9e8d320f06 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,6 +1324,10 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "leaky_relu" + argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " + } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 12:10:51 +0800 Subject: [PATCH 009/411] Add XLA support for LeakyReluOp. Code contributed by: Meng Chen --- tensorflow/compiler/tests/binary_ops_test.py | 7 ++++ tensorflow/compiler/tests/unary_ops_test.py | 5 +++ tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 0aafda7fb4..8941dd4e27 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,6 +178,13 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) + self._testBinary( + gen_nn_ops._leaky_relu_grad, + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), + np.array( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 73adb0d243..91f876fa23 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,6 +361,11 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + nn_ops.leaky_relu, + np.array([[-1.0, 1.0]], dtype=dtype), + expected=np.array([[-0.2, 1.0]], dtype=dtype)) + self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index d35777ccb1..ec14735884 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,6 +50,24 @@ class Relu6Op : public XlaOpKernel { } }; + +class LeakyReluOp : public XlaOpKernel { + public: + explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Compute the max of the input x and alpha*x. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + } + private: + float alpha_; +}; + class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -84,10 +102,34 @@ class Relu6GradOp : public XlaOpKernel { } }; +class LeakyReluGradOp : public XlaOpKernel { + public: + explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Return the lhs (incoming gradient) if the rhs (input feature) > 0, + // otherwise return the alpha * lhs. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + const TensorShape shape = ctx->InputShape(0); + const auto zero = + xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); + const auto pred = xla::Gt(ctx->Input(1), zero); + auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + } + private: + float alpha_; +}; + REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); +REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); +REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From a95281ce1b449d8f92a3799ff9c1dbf661b70bc4 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 5 Sep 2018 09:02:40 +0800 Subject: [PATCH 010/411] Avoid golden API file changing. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +-- tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt | 1 + tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index d8c2a1a0fc..f5a09e09dc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -42,7 +42,6 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; -using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -165,7 +164,7 @@ TEST_F(NNGradTest, Relu6Grad) { TEST_F(NNGradTest, LeakyReluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = LeakyRelu(scope_, x); + auto y = ops::internal::LeakyRelu(scope_, x); // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt index 4a61889f54..280148e032 100644 --- a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -1,4 +1,5 @@ op { graph_op_name: "LeakyRelu" + visibility: HIDDEN summary: "Computes rectified linear: `max(features, features * alpha)`." } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 9e8d320f06..4de662fe33 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,10 +1324,6 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "leaky_relu" - argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " - } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 90cf7fb7786c8a9c135ef73482856b082e80f61a Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Tue, 11 Sep 2018 12:48:30 +0800 Subject: [PATCH 011/411] Fix lint errors and typos. --- tensorflow/compiler/tests/binary_ops_test.py | 9 +++++---- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 14 +++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 8941dd4e27..069e83d083 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -179,11 +179,12 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._leaky_relu_grad, + gen_nn_ops.leaky_relu_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], + dtype=dtype)) self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index ec14735884..8d65e0339c 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,7 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; - class LeakyReluOp : public XlaOpKernel { public: explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { @@ -61,9 +60,9 @@ class LeakyReluOp : public XlaOpKernel { xla::XlaBuilder* builder = ctx->builder(); auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), static_cast(alpha_)); - ctx->SetOutput(0, - xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); } + private: float alpha_; }; @@ -115,11 +114,12 @@ class LeakyReluGradOp : public XlaOpKernel { const auto zero = xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, - xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + auto alpha = + XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); + ctx->SetOutput( + 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); } + private: float alpha_; }; -- GitLab From 5e9a9547f907599f6954fc5e28b7a78acf3b54eb Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 12 Sep 2018 11:02:12 +0800 Subject: [PATCH 012/411] Revert "Add XLA support for LeakyReluOp." This reverts commit d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74. Since bfloat16 was not supported by LeakyRelu, but it should be supported in XLA Ops. --- tensorflow/compiler/tests/binary_ops_test.py | 8 ---- tensorflow/compiler/tests/unary_ops_test.py | 5 --- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 ------------------- 3 files changed, 55 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index c478ff4eea..17280e445b 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,14 +178,6 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) - self._testBinary( - gen_nn_ops.leaky_relu_grad, - np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], - dtype=dtype)) - self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index dd29ef34ce..5b0e57f83f 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,11 +361,6 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) - self._assertOpOutputMatchesExpected( - nn_ops.leaky_relu, - np.array([[-1.0, 1.0]], dtype=dtype), - expected=np.array([[-0.2, 1.0]], dtype=dtype)) - self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index 8d65e0339c..d35777ccb1 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,23 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; -class LeakyReluOp : public XlaOpKernel { - public: - explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Compute the max of the input x and alpha*x. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* builder = ctx->builder(); - auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); - } - - private: - float alpha_; -}; - class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -101,35 +84,10 @@ class Relu6GradOp : public XlaOpKernel { } }; -class LeakyReluGradOp : public XlaOpKernel { - public: - explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Return the lhs (incoming gradient) if the rhs (input feature) > 0, - // otherwise return the alpha * lhs. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* b = ctx->builder(); - const TensorShape shape = ctx->InputShape(0); - const auto zero = - xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); - const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = - XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); - ctx->SetOutput( - 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); - } - - private: - float alpha_; -}; - REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); -REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); -REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From 78e205d35b31aa49e8dac357d827900a165f0a21 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 20 Sep 2018 15:56:34 +0200 Subject: [PATCH 013/411] Added warning message if cmake version is below 3.8 or host toolset is not set to x64 on windows --- tensorflow/contrib/cmake/CMakeLists.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 225c5e6227..a7a66472df 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,8 +1,14 @@ # Minimum CMake required +cmake_minimum_required(VERSION 3.5) + if(WIN32) - cmake_minimum_required(VERSION 3.8) -else() - cmake_minimum_required(VERSION 3.5) + if(${CMAKE_VERSION} VERSION_LESS "3.8") + message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.") + else() + if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64") + message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.") + endif() + endif() endif() # Project -- GitLab From f0886f7269de900d226455d4831722f6fc94a71b Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Tue, 25 Sep 2018 09:59:17 +0800 Subject: [PATCH 014/411] Fix build dependencies in tensorflow/cc/BUILD. --- tensorflow/cc/BUILD | 1 + tensorflow/python/kernel_tests/relu_op_test.py | 4 ++-- tensorflow/python/ops/nn_ops.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index f56521dac0..e99d15f85d 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -410,6 +410,7 @@ tf_cc_test( srcs = ["gradients/nn_grad_test.cc"], deps = [ ":cc_ops", + ":cc_ops_internal", ":grad_op_registry", ":grad_testutil", ":gradient_checker", diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 86d9c90e83..d97a1613b9 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -351,7 +351,7 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with compat.forward_compatibility_horizon(2018, 10, 2): + with compat.forward_compatibility_horizon(2018, 11, 2): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], @@ -369,7 +369,7 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with compat.forward_compatibility_horizon(2018, 10, 2): + with compat.forward_compatibility_horizon(2018, 11, 2): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index d646245ce3..2861f40586 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,7 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - if compat.forward_compatible(2018, 10, 1): + if compat.forward_compatible(2018, 11, 1): return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") return math_ops.maximum(alpha * features, features, name=name) -- GitLab From 96eec07af06f4dfc75cee57b74ba4b5347619634 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 26 Sep 2018 13:04:46 +0800 Subject: [PATCH 015/411] Re-add compat module for leaky_relu implementation. --- tensorflow/python/ops/nn_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 3f64f0af9a..78e000e458 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -22,6 +22,7 @@ import numbers import numpy as np +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util -- GitLab From 3c01644ed3fad31ac1b09afe31e655bd8892f02b Mon Sep 17 00:00:00 2001 From: William Irons Date: Wed, 26 Sep 2018 14:36:45 -0400 Subject: [PATCH 016/411] Artifact links for ppc64le GPU builds. whl files for tensorflow_gpu..._ppc6le are now hosted on the OSU Jenkins build server. Nightly builds and Stable Release builds are provided. I didn't include the version number so we won't need to update the readme file for every new release --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57efb876c9..4f57aca99f 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,8 @@ The TensorFlow project strives to abide by generally accepted best practices in | --- | --- | --- | | **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | | **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | -| **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA | +| **IBM ppc64le GPU** Nightly | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | +| **IBM ppc64le GPU** Stable Release | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | | **Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) | | **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) | -- GitLab From 307a095da517a7382f66e14273464c85296425aa Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Fri, 28 Sep 2018 04:59:16 +0000 Subject: [PATCH 017/411] Modify docs to conform to Python syntax --- tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt | 2 +- tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt | 2 +- tensorflow/go/op/wrappers.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt index 40c00ef58f..cd4cc5c906 100644 --- a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt @@ -21,7 +21,7 @@ used to convert the float values to their quantized equivalents. In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: ``` -if T == qint8, in[i] += (range(T) + 1)/ 2.0 +if T == qint8: in[i] += (range(T) + 1)/ 2.0 out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) ``` here `range(T) = numeric_limits::max() - numeric_limits::min()` diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt index 37ac10dddb..b7311153f4 100644 --- a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt @@ -42,7 +42,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: ``` out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -if T == qint8, out[i] -= (range(T) + 1) / 2.0 +if T == qint8: out[i] -= (range(T) + 1) / 2.0 ``` here `range(T) = numeric_limits::max() - numeric_limits::min()` diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 2f297d5161..9ec651777d 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -19500,7 +19500,7 @@ func QuantizeV2RoundMode(value string) QuantizeV2Attr { // // ``` // out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8, out[i] -= (range(T) + 1) / 2.0 +// if T == qint8: out[i] -= (range(T) + 1) / 2.0 // ``` // // here `range(T) = numeric_limits::max() - numeric_limits::min()` @@ -23823,7 +23823,7 @@ func DequantizeMode(value string) DequantizeAttr { // In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: // // ``` -// if T == qint8, in[i] += (range(T) + 1)/ 2.0 +// if T == qint8: in[i] += (range(T) + 1)/ 2.0 // out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) // ``` // here `range(T) = numeric_limits::max() - numeric_limits::min()` -- GitLab From f16111286b19f4145df63b73c45be1645bde8737 Mon Sep 17 00:00:00 2001 From: Bairen Yi Date: Sat, 29 Sep 2018 22:13:09 +0800 Subject: [PATCH 018/411] Added log entries for copying unpinned memory RDMA Currently there are large number of tensors managed by non-visitable memory allocators in CPU-only PS. GPU workers seem less prone to this problem. Copying large sized tensor buffers may introduce non-trivial overhead. Should probably fix this. Signed-off-by: Bairen Yi --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 156 +++++++++++-------- 1 file changed, 93 insertions(+), 63 deletions(-) diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index bb06f1c41c..3549cedb70 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#include #include #include @@ -30,19 +29,17 @@ limitations under the License. #include #include "tensorflow/contrib/gdr/gdr.pb.h" -#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/common_runtime/process_state.h" #if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #endif // GOOGLE_CUDA -#include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" namespace tensorflow { @@ -70,14 +67,11 @@ bool IsGDRAvailable() { int TryToReadNumaNode(ibv_device* device) { #if defined(__APPLE__) LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0"; - return 0; + return port::kNUMANoAffinity; #elif defined(PLATFORM_WINDOWS) // Windows support for NUMA is not currently implemented. Return node 0. - return 0; + return port::kNUMANoAffinity; #else - VLOG(2) << "Trying to read NUMA node for device: " << device->name; - static const int kUnknownNumaNode = -1; - auto filename = string(device->ibdev_path) + "/device/numa_node"; std::ifstream ifs(filename.c_str()); @@ -91,12 +85,12 @@ int TryToReadNumaNode(ibv_device* device) { << value << "), but there must be at least one NUMA node" ", so returning NUMA node zero"; - return 0; + return port::kNUMANoAffinity; } LOG(INFO) << "NUMA node for device: " << device->name << " is " << value; return value; } - return kUnknownNumaNode; + return port::kNUMANoAffinity; #endif } @@ -138,8 +132,6 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; - static void RegMemVisitors(); - protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -150,7 +142,8 @@ class GdrMemoryManager : public RemoteMemoryManager { ibv_mr* FindMemoryRegion(void* addr, size_t length); - void InsertMemoryRegion(void* addr, size_t length); + void InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name); void EvictMemoryRegion(void* addr, size_t length); @@ -160,6 +153,7 @@ class GdrMemoryManager : public RemoteMemoryManager { RdmaEndpointPtr listening_; std::atomic stopped_; int epfd_; + int numa_node_; // Server side endpoints // Accessed sequentially in Run() so not protected by lock @@ -190,46 +184,10 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) { - static std::once_flag flag; - std::call_once(flag, []() { RegMemVisitors(); }); -} + next_key_(0) {} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } -/*static*/ void GdrMemoryManager::RegMemVisitors() { - SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("CPU:", numa_node)); - }; - SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); - }; - ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); - ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); - -#if GOOGLE_CUDA - if (IsGDRAvailable()) { - int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - - // Note we don't free allocated GPU memory so there is no free visitor - SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); - }; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, - alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA -} - Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -289,6 +247,42 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } + numa_node_ = TryToReadNumaNode(listening_->verbs->device); + + SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on numa_node " + << numa_node; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "De-registering RDMA capable memory region on numa_node " + << numa_node; + EvictMemoryRegion(ptr, num_bytes); + }; + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + LOG(INFO) << "Instrumenting CPU allocator(s)"; + +#if GOOGLE_CUDA + if (IsGDRAvailable()) { + int bus_id = numa_node_ + 1; + + SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); + LOG(INFO) << "Instrumenting GPU allocator(s) with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA + return Status::OK(); } @@ -405,7 +399,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( ibv_mr* mr = FindMemoryRegion(addr, length); #if GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape()); GPUUtil::CopyGPUTensorToCPU( @@ -456,11 +450,27 @@ void GdrMemoryManager::TransportOptionsFromTensor( #endif if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + Tensor host_copy(alloc, tensor.dtype(), tensor.shape()); + + std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } + + buffer->Ref(); + } else { + buffer->Ref(); } - buffer->Ref(); TensorKey tensor_key = next_key_++; { mutex_lock l(server_mu_); @@ -470,7 +480,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, tensor); } else { checksum = GPUUtil::Checksum(tensor); @@ -508,7 +518,8 @@ void GdrMemoryManager::TensorFromTransportOptions( Tensor host_copy; #if GOOGLE_CUDA if (mr == nullptr && !on_host) { - Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); + Allocator* alloc = + GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_); host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); buffer = DMAHelper::buffer(&host_copy); addr = buffer->data(); @@ -518,8 +529,18 @@ void GdrMemoryManager::TensorFromTransportOptions( #endif // GOOGLE_CUDA if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } } decltype(clients_)::iterator iter; @@ -568,7 +589,8 @@ void GdrMemoryManager::TensorFromTransportOptions( } #if GOOGLE_CUDA - if (host_copy.NumElements() > 0) { + if (device->tensorflow_gpu_device_info() && !on_host && + host_copy.NumElements() > 0) { uint64_t checksum = 0; if (VLOG_IS_ON(2)) { checksum = GPUUtil::Checksum(host_copy); @@ -598,6 +620,12 @@ void GdrMemoryManager::TensorFromTransportOptions( } #endif // GOOGLE_CUDA + if ((on_host || !device->tensorflow_gpu_device_info()) && + host_copy.NumElements() > 0) { + std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + } + uint64_t end = Env::Default()->NowMicros(); VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey() @@ -607,7 +635,7 @@ void GdrMemoryManager::TensorFromTransportOptions( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (device->tensorflow_gpu_device_info() && (!on_host)) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, *tensor); } else { checksum = GPUUtil::Checksum(*tensor); @@ -668,7 +696,8 @@ ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) { } } -void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { +void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name) { if (length == 0) return; ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length); if (mr != nullptr) { @@ -676,7 +705,8 @@ void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); mrs_.insert(iter, {mr, &MRDeleter}); } else { - LOG(WARNING) << "Cannot register memory region"; + LOG(WARNING) << "Cannot register memory region allocated by " + << allocator_name; } } -- GitLab From 38808119e9d5f8ad24bb414aab281e0fa3fde6dc Mon Sep 17 00:00:00 2001 From: Gautam Date: Tue, 2 Oct 2018 11:56:06 +0530 Subject: [PATCH 019/411] Update backend.py Adding missing import files in the commented examples. When trying out that particular example in commented section the TensorFlow and bumpy imports are missing --- tensorflow/python/keras/backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 584facc859..79ca4beb73 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -773,6 +773,8 @@ def is_keras_tensor(x): Examples: ```python + >>> import tensorflow as tf + >>> import numpy >>> from keras import backend as K >>> from keras.layers import Input, Dense >>> np_var = numpy.array([1, 2]) -- GitLab From a12b8c4afdca3ac2945d62b3b83ca2599ab360f9 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 16 Sep 2018 18:39:50 -0400 Subject: [PATCH 020/411] [xla] Improve validation of Broadcast shape If one misreads the semantics of this instruction, it's easy to cause an out of bounds access into the dimensions here. Add an extra check to return a proper error to the user rather than crashing in that case. Ref #22130 --- tensorflow/compiler/xla/service/hlo_verifier.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 50f39cbcb5..0f6ecd42f6 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -313,8 +313,9 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { operand_dimension < ShapeUtil::Rank(operand_shape); ++operand_dimension) { int64 output_dimension = broadcast->dimensions()[operand_dimension]; - TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) == - operand_shape.dimensions(operand_dimension)) + TF_RET_CHECK((output_dimension < ShapeUtil::Rank(broadcast->shape())) && + (broadcast->shape().dimensions(output_dimension) == + operand_shape.dimensions(operand_dimension))) << broadcast->ToString() << " operand shape " << operand_shape; } return Status::OK(); -- GitLab From 946e58e402778606d26056f5decf91ecfb4a9f89 Mon Sep 17 00:00:00 2001 From: YongJoon Lee Date: Wed, 3 Oct 2018 16:43:55 +0900 Subject: [PATCH 021/411] fix spelling problem --- .../contrib/estimator/python/estimator/boosted_trees.py | 6 +++--- .../estimator/python/estimator/dnn_linear_combined.py | 2 +- .../python/estimator/dnn_with_layer_annotations.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index a1f1c5f3d7..b131ed4f12 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -75,7 +75,7 @@ class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase): # pylint: layer. head: the `Head` instance defined for Estimator. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing @@ -199,7 +199,7 @@ def boosted_trees_classifier_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: number of label classes. Default is binary classification. Multiclass support is not yet implemented. @@ -345,7 +345,7 @@ def boosted_trees_regressor_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. Multi-dimensional support is not yet implemented. diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py index 724bc2c82f..4e7965ef26 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py @@ -118,7 +118,7 @@ class DNNLinearCombinedEstimator(estimator.Estimator): head: A `_Head` instance constructed with a method such as `tf.contrib.estimator.multi_label_head`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py index 6ca7aaf989..40a91175b7 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py @@ -248,7 +248,7 @@ def DNNClassifierWithLayerAnnotations( # pylint: disable=invalid-name model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also - be used to load checkpoints from the directory into a estimator to + be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. -- GitLab From 1f1fe5a01af616707b8554d59651fb4925d7faee Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 3 Oct 2018 22:23:08 -0700 Subject: [PATCH 022/411] Include .inc files for absl headers --- tensorflow/tools/pip_package/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d864a7a039..54a7b7ffbe 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -232,6 +232,8 @@ headers = (list(find_files('*.h', 'tensorflow/core')) + list(find_files('*', 'third_party/eigen3')) + list(find_files('*.h', 'tensorflow/include/external/com_google_absl')) + + list(find_files('*.inc', + 'tensorflow/include/external/com_google_absl')) + list(find_files('*', 'tensorflow/include/external/eigen_archive'))) setup( -- GitLab From 3302b4c1fcf2ecd3ae3119cddb16d057235ece07 Mon Sep 17 00:00:00 2001 From: Tingbo Lu Date: Fri, 5 Oct 2018 00:02:45 +0800 Subject: [PATCH 023/411] Update rnn_cell.py --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 59a61af7b3..e8073f8463 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -1110,7 +1110,7 @@ _Linear = core_rnn_cell._Linear # pylint: disable=invalid-name class AttentionCellWrapper(rnn_cell_impl.RNNCell): """Basic attention cell wrapper. - Implementation based on https://arxiv.org/abs/1409.0473. + Implementation based on https://arxiv.org/abs/1601.06733. """ def __init__(self, -- GitLab From f410ffc1699e864e84857089183db0d952ada7fe Mon Sep 17 00:00:00 2001 From: Andreas Madsen Date: Thu, 26 Jul 2018 15:44:39 +0200 Subject: [PATCH 024/411] make sparsemax nan and infinity safe logits that are -inf will be given 0 probability and logits that are inf will result in a nan output. Likewise if all logits are -inf the output will also be nan. This is done by using where operators, mostly because 0 * inf = nan and x/0 = sign(x) inf following the IEEE 754 standard. However these results are not mathematically correct in the context of the sparsemax algorithm. Fixes: https://github.com/tensorflow/tensorflow/issues/15564 --- .../kernel_tests/sparsemax_loss_test.py | 64 +++++++++++++++++++ .../python/kernel_tests/sparsemax_test.py | 63 +++++++++++++++++- .../contrib/sparsemax/python/ops/sparsemax.py | 30 ++++++++- .../sparsemax/python/ops/sparsemax_loss.py | 32 ++++++++-- 4 files changed, 178 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py index 360e7dbe75..2db76a6d56 100644 --- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py +++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py @@ -109,6 +109,66 @@ class SparsemaxLossTest(test.TestCase): np_loss, tf_loss_out, half_atol=1e-2, half_rtol=5e-3) self.assertShapeEqual(np_loss, tf_loss_op) + def _test_sparsemax_loss_of_nan(self, dtype, random, use_gpu): + """check sparsemax-loss transfers nan""" + q = np.asarray([ + [0, 0, 1], + [0, 0, 1], + [0, 0, 1] + ]) + z_nan = np.asarray([ + [0, np.nan, 0], + [0, np.nan, np.nan], + [np.nan, np.nan, np.nan] + ]).astype(dtype) + + _, tf_loss_nan = self._tf_sparsemax_loss(z_nan, q, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [np.nan, np.nan, np.nan], + tf_loss_nan) + + def _test_sparsemax_loss_of_inf(self, dtype, random, use_gpu): + """check sparsemax-loss is infinity safe""" + q = np.asarray([ + [0, 0, 1], + [0, 0, 1], + [0, 0, 1], + [0, 0, 1] + ]) + z_neg = np.asarray([ + [0, -np.inf, 0], + [0, -np.inf, -np.inf], + [-np.inf, -np.inf, 0], + [-np.inf, -np.inf, -np.inf], + ]).astype(dtype) + z_pos = np.asarray([ + [0, np.inf, 0], + [0, np.inf, np.inf], + [np.inf, np.inf, 0], + [np.inf, np.inf, np.inf] + ]).astype(dtype) + z_mix = np.asarray([ + [0, np.inf, 0], + [0, np.inf, -np.inf], + [-np.inf, np.inf, 0], + [-np.inf, np.inf, -np.inf] + ]).astype(dtype) + + _, tf_loss_neg = self._tf_sparsemax_loss(z_neg, q, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [0.25, np.inf, 0, np.nan], + tf_loss_neg) + + _, tf_loss_pos = self._tf_sparsemax_loss(z_pos, q, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [np.nan, np.nan, np.nan, np.nan], + tf_loss_pos) + + _, tf_loss_mix = self._tf_sparsemax_loss(z_mix, q, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [np.nan, np.nan, np.nan, np.nan], + tf_loss_mix) + def _test_constant_add(self, dtype, random, use_gpu): """check sparsemax-loss proposition 3""" z = random.uniform(low=-3, high=3, size=(test_obs, 10)) @@ -198,6 +258,10 @@ class SparsemaxLossTest(test.TestCase): self._test_sparsemax_loss_against_numpy(dtype, random, use_gpu=False) + self._test_sparsemax_loss_of_nan(dtype, random, use_gpu=False) + + self._test_sparsemax_loss_of_inf(dtype, random, use_gpu=False) + self._test_constant_add(dtype, random, use_gpu=False) self._test_sparsemax_loss_positive(dtype, random, use_gpu=False) diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py index 259e62bd86..38c6dd15db 100644 --- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py +++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py @@ -87,6 +87,61 @@ class SparsemaxTest(test.TestCase): p_sparemax, tf_sparsemax_out, half_atol=5e-3) self.assertShapeEqual(p_sparemax, tf_sparsemax_op) + def _test_sparsemax_of_nan(self, dtype, random, use_gpu): + """check sparsemax transfers nan""" + z_nan = np.asarray([ + [0, np.nan, 0], + [0, np.nan, np.nan], + [np.nan, np.nan, np.nan], + ]).astype(dtype) + + _, tf_sparsemax_nan = self._tf_sparsemax(z_nan, dtype, use_gpu) + self.assertAllCloseAccordingToType([ + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan] + ], tf_sparsemax_nan) + + def _test_sparsemax_of_inf(self, dtype, random, use_gpu): + """check sparsemax is infinity safe""" + z_neg = np.asarray([ + [0, -np.inf, 0], + [0, -np.inf, -np.inf], + [-np.inf, -np.inf, -np.inf], + ]).astype(dtype) + z_pos = np.asarray([ + [0, np.inf, 0], + [0, np.inf, np.inf], + [np.inf, np.inf, np.inf] + ]).astype(dtype) + z_mix = np.asarray([ + [0, np.inf, 0], + [0, np.inf, -np.inf], + [-np.inf, np.inf, -np.inf] + ]).astype(dtype) + + _, tf_sparsemax_neg = self._tf_sparsemax(z_neg, dtype, use_gpu) + self.assertAllCloseAccordingToType([ + [0.5, 0, 0.5], + [1, 0, 0], + [np.nan, np.nan, np.nan] + ], tf_sparsemax_neg) + + _, tf_sparsemax_pos = self._tf_sparsemax(z_pos, dtype, use_gpu) + self.assertAllCloseAccordingToType([ + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan] + ], tf_sparsemax_pos) + + _, tf_sparsemax_mix = self._tf_sparsemax(z_mix, dtype, use_gpu) + self.assertAllCloseAccordingToType([ + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan] + ], tf_sparsemax_mix) + + def _test_sparsemax_of_zero(self, dtype, random, use_gpu): """check sparsemax proposition 1, part 1""" z = np.zeros((1, 10)) @@ -97,7 +152,7 @@ class SparsemaxTest(test.TestCase): self.assertAllCloseAccordingToType(p_sparemax, tf_sparsemax_out) self.assertShapeEqual(p_sparemax, tf_sparsemax_op) - def _test_sparsemax_of_inf(self, dtype, random, use_gpu): + def _test_sparsemax_of_to_inf(self, dtype, random, use_gpu): """check sparsemax proposition 1, part 2""" z = random.uniform(low=-3, high=3, size=(test_obs, 10)) @@ -210,10 +265,14 @@ class SparsemaxTest(test.TestCase): self._test_sparsemax_against_numpy(dtype, random, use_gpu=False) - self._test_sparsemax_of_zero(dtype, random, use_gpu=False) + self._test_sparsemax_of_nan(dtype, random, use_gpu=False) self._test_sparsemax_of_inf(dtype, random, use_gpu=False) + self._test_sparsemax_of_zero(dtype, random, use_gpu=False) + + self._test_sparsemax_of_to_inf(dtype, random, use_gpu=False) + self._test_constant_add(dtype, random, use_gpu=False) self._test_permutation(dtype, random, use_gpu=False) diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py index e617af2ff1..f903b629c7 100644 --- a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py +++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py @@ -49,7 +49,14 @@ def sparsemax(logits, name=None): obs = array_ops.shape(logits)[0] dims = array_ops.shape(logits)[1] - z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] + # In the paper, they call the logits z. + # The mean(logits) can be substracted from logits to make the algorithm + # more numerically stable. the instability in this algorithm comes mostly + # from the z_cumsum. Substacting the mean will cause z_cumsum to be close + # to zero. However, in practise the numerical instability issues are very + # minor and substacting the mean causes extra issues with inf and nan + # input. + z = logits # sort z z_sorted, _ = nn.top_k(z, k=dims) @@ -64,10 +71,27 @@ def sparsemax(logits, name=None): k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) # calculate tau(z) - indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1) + # If there are inf values or all values are -inf, the k_z will be zero, + # this is mathematically invalid and will also cause the gather_nd to fail. + # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then + # fixed later (see p_safe) by returning p = nan. This results in the same + # behavior as softmax. + k_z_safe = math_ops.maximum(k_z, 1) + indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype) # calculate p - return math_ops.maximum( + p = math_ops.maximum( math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis]) + # If k_z = 0 or if z = nan, then the input is invalid + p_safe = array_ops.where( + math_ops.logical_or( + math_ops.equal(k_z, 0), + math_ops.is_nan(z_cumsum[:, -1]) + ), + array_ops.fill([obs, dims], math_ops.cast(float('nan'), logits.dtype)), + p + ) + + return p_safe diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py index 582d1e6136..9095cfe267 100644 --- a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py +++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py @@ -47,14 +47,34 @@ def sparsemax_loss(logits, sparsemax, labels, name=None): sparsemax = ops.convert_to_tensor(sparsemax, name="sparsemax") labels = ops.convert_to_tensor(labels, name="labels") - shifted_logits = logits - \ - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] + # In the paper, they call the logits z. + # A constant can be substracted from logits to make the algorithm + # more numerically stable in theory. However, there are really no major + # source numerical instability in this algorithm. + z = logits # sum over support - support = math_ops.cast(sparsemax > 0, sparsemax.dtype) - sum_s = support * sparsemax * (shifted_logits - 0.5 * sparsemax) + # Use a conditional where instead of a multiplication to support z = -inf. + # If z = -inf, and there is no support (sparsemax = 0), a multiplication + # would cause 0 * -inf = nan, which is not correct in this case. + sum_s = array_ops.where( + math_ops.logical_or(sparsemax > 0, math_ops.is_nan(sparsemax)), + sparsemax * (z - 0.5 * sparsemax), + array_ops.zeros_like(sparsemax) + ) # - z_k + ||q||^2 - q_part = labels * (0.5 * labels - shifted_logits) + q_part = labels * (0.5 * labels - z) + # Fix the case where labels = 0 and z = -inf, where q_part would + # otherwise be 0 * -inf = nan. But since the lables = 0, no cost for + # z = -inf should be consideredself. + # The code below also coveres the case where z = inf. Howeverm in this + # caose the sparsemax will be nan, which means the sum_s will also be nan, + # therefor this case doesn't need addtional special treatment. + q_part_safe = array_ops.where( + math_ops.logical_and(math_ops.equal(labels, 0), math_ops.is_inf(z)), + array_ops.zeros_like(z), + q_part + ) - return math_ops.reduce_sum(sum_s + q_part, axis=1) + return math_ops.reduce_sum(sum_s + q_part_safe, axis=1) -- GitLab From efcf11cd44dfe8ddc441aa58f1b21ff7c8444568 Mon Sep 17 00:00:00 2001 From: shengfuintel Date: Fri, 5 Oct 2018 13:47:52 -0700 Subject: [PATCH 025/411] Clean up the code under INTEL_MKL_ML_ONLY --- tensorflow/core/graph/mkl_layout_pass.cc | 2177 +---------------- tensorflow/core/graph/mkl_layout_pass_test.cc | 1865 -------------- 2 files changed, 1 insertion(+), 4041 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 7394b1cddf..42a35727db 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -45,2181 +45,6 @@ limitations under the License. namespace tensorflow { -#ifdef INTEL_MKL_ML_ONLY - -// This pass implements rewriting of graph to support following scenarios: -// (A) Merging nodes in the graph -// (B) Rewriting a node in the graph to a new node -// Rewrite happens under following 2 scenarios: -// 1) Propagating Mkl layout as an additional output tensor -// (we will loosely call a tensor that carries Mkl layout as Mkl tensor -// henceforth.) from every Mkl supported NN layer. -// 2) Context-based rewrite: This is needed in order to optimize -// gradient ops of Conv2D+AddBias. Gradient op of both the Conv2D and -// MatMul is BiasAddGrad, and we need to rewrite BiasAddGrad into -// Conv2D-specific BiasAddGrad, and MatMul-specific BiasAddGrad. -// This is context-specific optimization, where the context is the -// forward operator that the BiasAddGrad corresponds to. -// -// Example of A : Merging nodes in the graph -// ----------------------------------------- -// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as: -// -// O = Conv2D(A, B) -// P = BiasAdd(O, C) -// -// We merge them into Conv2DWithBias as: -// P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m) -// -// The meaning of A_m, B_m and C_m is explained in B.1. -// -// Merge rules: -// - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_ -// goes to BiasAdd. -// - Also, the intersection of attributes of both the nodes must have same -// values. -// - Both the nodes must have been assigned to same device (if any). -// -// Example of B.1 : Rewriting nodes to Mkl nodes -// --------------------------------------------- -// Consider a Relu node. Current definition of Relu node looks like: -// -// O = Relu(A) -// -// Relu has 1 input (A), and 1 output (O). -// -// This rewrite pass will generate a new graph node for Relu (new node is -// called MklRelu) as: -// -// O, O_m = MklRelu(A, A_m) -// -// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is -// same as input A of Relu; output O is same as output O of Relu. O_m is the -// additional output tensor that will be set by MklRelu, and it represents -// Mkl tensor corresponding to O -- in other words, O_m is some kind of -// metadata for O. A_m is additional input of Relu, and it represents metadata -// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives -// this metadata from previous node in the graph. -// -// When a previous node in the graph is an Mkl node, A_m will represent a valid -// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent -// a dummy Mkl tensor. -// -// Rewriting rules: -// - Selection of a node for rewriting happens by registering the op type of -// the node with the rewriting pass. If the op type is not registered, then -// all nodes of this op type will not be rewritten. -// - Number of inputs after rewriting: -// Since for every input Tensorflow tensor, the rewritten node gets Mkl -// tensor(s), rewritten node gets 2*N inputs, where N is the number of -// inputs for the original node. -// - Number of outputs after rewriting: -// Since for every output Tensorflow tensor, the rewritten node generates -// Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the -// number of outputs of the original node. -// - Ordering of Tensorflow tensors and Mkl tensors: -// Since every rewritten node generates twice the number of inputs and -// outputs, one could imagine various orderings among Tensorflow tensors -// and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as -// inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m -// in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m -// order. Among N inputs one can get N! permutations. -// -// So the question is: which order do we follow? We support 2 types of -// orderings: (1) interleaved, and (2) contiguous. Interleaved ordering -// follows an intuitive order where an Mkl tensor follows the -// corresponding Tensorflow tensor immediately. In the context of the -// above example, it will be: A, A_m, B, B_m. Note that the ordering rule -// applies to both the inputs and outputs. Contiguous ordering means -// all the Tensorflow tensors are contiguous followed by all the Mkl -// tensors. We use contiguous ordering as default. -// -// Graph rewrite algorithm: -// Algorithm: Graph Rewrite -// Input: Graph G, Names of the nodes to rewrite and their new names -// Output: Modified Graph G' if the nodes are modified, G otherwise. -// Start: -// N = Topological_Sort(G) // N is a set of nodes in toposort order. -// foreach node n in N -// do -// if (Is_MKL_Op(n)) // Can this node accept an Mkl layout as input. -// then -// E = set of of n -// E' = {} // a new set of edges for rewritten node -// foreach in E -// do -// E' U {} // First copy edge which generates Tensorflow -// // tensor as it is -// m = Source node of edge e -// if Is_Rewritten(m) // Did we rewrite this node in this pass? -// then -// E' U {} // If yes, then m will generate an Mkl -// // tensor as an additional output. -// else -// d = Generate_Dummy_Mkl_Tensor() // If not, generate a dummy -// // Mkl tensor. -// E' U {} // The dummy Mkl tensor has only 1 output slot. -// fi -// done -// n' = Build_New_Node(G,new_name,E') -// Mark_Rewritten(n') // Mark the new node as being rewritten. -// fi -// done -// -// Explanation: -// For graph rewrite, we visit nodes of the input graph in the -// topological sort order. With this ordering, we visit nodes in the -// top-to-bottom fashion. We need this order because while visiting a -// node we want that all of its input nodes are visited and rewritten if -// applicable. This is because if we need to rewrite a given node -// then all of its input nodes need to be fixed (in other words they -// cannot be deleted later.) -// -// While visiting a node, we first check if the op type of the node is -// an Mkl op. If it is, then we rewrite that node after constructing -// new inputs to the node. If the op type of the node is not Mkl op, -// then we do not rewrite that node. -// -// Handling workspace propagation for certain ops: -// -// Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require -// passing of a workspace from their respective forward ops. Workspace -// tensors provide memory for storing results of intermediate operations -// which are helpful in backward propagation. TensorFlow does not have -// a notion of a workspace and as a result does not allow producing -// additional outputs from these forward ops. For these ops, we need -// to add 2 extra edges between forward ops and their corresponding -// backward ops - the first extra edge carries a workspace tensor and -// the second one carries an Mkl tensor for the workspace tensor. -// -// Example: -// -// Typical graph for MaxPool and its gradient looks like: -// -// A = MaxPool(T) -// B = MaxPoolGrad(X, A, Y) -// -// We will transform this graph to propagate the workspace as: -// (with the contiguous ordering) -// -// A, W, A_m, W_m = MklMaxPool(T, T_m) -// B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m) -// -// Here W is the workspace tensor. Transformed tensor names with the -// suffix _m are Mkl tensors, and this transformation has been done -// using the algorithm discussed earlier. The transformation for -// workspace propagation only adds extra outputs (W, W_m) for a forward -// op and connects them to the corresponding backward ops. -// -// Terms: -// -// Forward op name = name of the op in the forward pass -// where a workspace tensor originates (MaxPool in this example) -// Backward op name = name of the op in the backward pass that receives -// a workspace tensor from the forward op (MaxPoolGrad in the example) -// Slot = Position of the output or input slot that will be -// used by the workspace tensor (1 for MklMaxPool as W is the 2nd -// output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad) -// -// Question: -// -// How do we associate a backward op to a forward op? There can be more -// than one op with the exact same name. -// -// In this example, we associate MaxPoolGrad with MaxPool. But there -// could be more than one MaxPool ops. To solve this problem, we look -// for _direct_ edge between a forward op and a backward op (tensor A is -// flowing along this edge in the example). -// -// How do we transform forward and backward ops when there is no direct -// edge between them? In such a case, we generate dummy tensors for -// workspace tensors. For the example, transformation of MaxPool will -// be exactly same as it would be when there is a direct edge between -// the forward and the backward op --- it is just that MaxPool won't -// generate any workspace tensor. For MaxPoolGrad, the transformation -// will also be same, but instead of connecting W and W_m with the -// outputs of MaxPool, we will produce dummy tensors for them, and we -// will set workspace_enabled attribute to false. -// -// Example of B.2 : Context-based node rewrite -// ------------------------------------------- -// Consider BiasAddGrad op as: -// -// O = _MklConv2D(A, B, C, A_m, B_m, C_m) -// P = BiasAddGrad(O) -// -// Then we rewrite it as: -// -// P = Conv2DWithBiasBackpropBias(O, O_m) -// -// Rewrite of BiasAddGrad into Conv2DWithBiasBackpropBias takes place depending -// on the matching 'context'. The term context is loosely related to which -// forward op is _associated_ to BiasAddGrad. If it is _MklConv2DWithBias then -// we consider it Conv2D context; if it is MatMul, then it is MatMul context. - -class MklLayoutRewritePass : public GraphOptimizationPass { - public: - MklLayoutRewritePass() { - // NOTE: names are alphabetically sorted. - csinfo_.addn = "AddN"; - csinfo_.avg_pool = "AvgPool"; - csinfo_.avg_pool_grad = "AvgPoolGrad"; - csinfo_.bias_add = "BiasAdd"; - csinfo_.bias_add_grad = "BiasAddGrad"; - csinfo_.concat = "Concat"; - csinfo_.concatv2 = "ConcatV2"; - csinfo_.conv2d = "Conv2D"; - csinfo_.conv2d_grad_input = "Conv2DBackpropInput"; - csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter"; - csinfo_.fused_batch_norm = "FusedBatchNorm"; - csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad"; - csinfo_.identity = "Identity"; - csinfo_.lrn = "LRN"; - csinfo_.lrn_grad = "LRNGrad"; - csinfo_.matmul = "MatMul"; - csinfo_.max_pool = "MaxPool"; - csinfo_.max_pool_grad = "MaxPoolGrad"; - csinfo_.mkl_conv2d = "_MklConv2D"; - csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput"; - csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter"; - csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias"; - csinfo_.mkl_conv2d_with_bias_backprop_bias = - "_MklConv2DWithBiasBackpropBias"; - csinfo_.relu = "Relu"; - csinfo_.relu_grad = "ReluGrad"; - csinfo_.reshape = "Reshape"; - csinfo_.split = "Split"; - // Element-wise ops. Ensure you also add any new ops to IsOpElementWise - // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the - // MklInputConversion op is added before it. - csinfo_.add = "Add"; - csinfo_.maximum = "Maximum"; - csinfo_.mul = "Mul"; - csinfo_.squared_difference = "SquaredDifference"; - csinfo_.sub = "Sub"; - // End - element-wise ops. See note above. - - // NOTE: names are alphabetically sorted. - rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn), - CopyAttrsAddN, AddNRewrite, nullptr}); - rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.avg_pool, - mkl_op_registry::GetMklOpName(csinfo_.avg_pool), - CopyAttrsPooling, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.avg_pool_grad, - mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad), - CopyAttrsPooling, AlwaysRewrite, nullptr}); - // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending - // on if context contains Conv2D. - rinfo_.push_back({csinfo_.bias_add_grad, - csinfo_.mkl_conv2d_with_bias_backprop_bias, - CopyAttrsBiasAddGrad, ContextMatchRewrite, - &biasaddgrad_conv2dwithbias_context_}); - // BiasAddGrad gets written into BiasAddGrad depending on if context - // contains MatMul. - rinfo_.push_back({csinfo_.bias_add_grad, csinfo_.matmul, - CopyAttrsBiasAddGrad, ContextMatchRewrite, - &biasaddgrad_matmul_context_}); - rinfo_.push_back({csinfo_.concat, - mkl_op_registry::GetMklOpName(csinfo_.concat), - CopyAttrsConcat, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.concatv2, - mkl_op_registry::GetMklOpName(csinfo_.concatv2), - CopyAttrsConcatV2, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.conv2d, - mkl_op_registry::GetMklOpName(csinfo_.conv2d), - CopyAttrsConv2D, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.conv2d_grad_filter, - mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter), - CopyAttrsConv2D, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.conv2d_grad_input, - mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input), - CopyAttrsConv2D, AlwaysRewrite, nullptr}); - - rinfo_.push_back({csinfo_.fused_batch_norm, - mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm), - CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr}); - rinfo_.push_back( - {csinfo_.fused_batch_norm_grad, - mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad), - CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.identity, - mkl_op_registry::GetMklOpName(csinfo_.identity), - CopyAttrsIdentity, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.lrn_grad, - mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.max_pool, - mkl_op_registry::GetMklOpName(csinfo_.max_pool), - CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr}); - rinfo_.push_back({csinfo_.max_pool_grad, - mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad), - CopyAttrsPooling, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.maximum, - mkl_op_registry::GetMklOpName(csinfo_.maximum), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.relu_grad, - mkl_op_registry::GetMklOpName(csinfo_.relu_grad), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.reshape, - mkl_op_registry::GetMklOpName(csinfo_.reshape), - CopyAttrsReshape, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.squared_difference, - mkl_op_registry::GetMklOpName(csinfo_.squared_difference), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub), - CopyAttrsDataType, AlwaysRewrite, nullptr}); - - // Add info about which ops to add workspace edge to and the slots. - wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3}); - wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3}); - - // Add a rule for merging nodes - minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add, 0, - csinfo_.mkl_conv2d_with_bias}); - - biasaddgrad_matmul_context_ = {csinfo_.bias_add_grad, csinfo_.matmul, - IsBiasAddGradInMatMulContext}; - - biasaddgrad_conv2dwithbias_context_ = { - csinfo_.bias_add_grad, csinfo_.mkl_conv2d_with_bias, - IsBiasAddGradInConv2DWithBiasContext}; - - cinfo_.push_back(&biasaddgrad_matmul_context_); - cinfo_.push_back(&biasaddgrad_conv2dwithbias_context_); - } - - // Standard interface to run pass - Status Run(const GraphOptimizationPassOptions& options); - - // Helper function which does most of heavy lifting for rewriting - // Mkl nodes to propagate Mkl tensor as additional output - // - // Extracts common functionality between Run public interface and - // test interface. - // - // @return true, if and only if graph is mutated; false otherwise. - bool RunPass(std::unique_ptr* g); - - /// Structure to specify the context information used in a node rewrite rule - typedef struct { - string node; // Name of the node to be rewritten - string fwd; // Name of the node in the forward pass that this node - // corresponds to - std::function context_match_fn; - } ContextInfo; - - /// Structure to specify the name of an original node, its new name after - /// rewrite, the number of inputs to the original node, the function to - /// be used to copy attributes for the op, and the rule (if any) which - /// must hold for rewriting the node - typedef struct { - string name; // Original name of op of the node in the graph - string new_name; // New name of the op of the node in the graph - // A function handler to copy attributes from an old node to a new node. - std::function copy_attrs; - // A rule under which to rewrite this node - std::function rewrite_rule; - // ContextInfo, if any, to be used for rewrite - ContextInfo* context; - } RewriteInfo; - - /// Structure to specify a forward op, a backward op, and the slot numbers - /// in the forward and backward ops where we will add a workspace edge. - typedef struct { - string fwd_op; // Name of a forward op in the graph - string bwd_op; // Name of a backward op in the graph - int fwd_slot; // Output slot in the forward op node where actual - // output tensor resides - int bwd_slot; // Input slot in the backward op node where actual - // input tensor resides - int ws_fwd_slot; // Output slot in the forward op node where workspace - // edge is added - int ws_bwd_slot; // Input slot in the backward op node where workspace - // edge is added - } WorkSpaceInfo; - - /// Structure to specify information used in node merge - typedef struct { - string pred; // Predecessor node string - string succ; // Successor node string - int op; // The operand no the predecessor node corresponds - // to the successor node - string new_node; // Name of the node after merge - } MergeInfo; - - /// Structure to store all constant strings - /// NOTE: names are alphabetically sorted. - typedef struct { - string addn; - string add; - string avg_pool; - string avg_pool_grad; - string bias_add; - string bias_add_grad; - string concat; - string concatv2; - string conv2d; - string conv2d_grad_input; - string conv2d_grad_filter; - string fused_batch_norm; - string fused_batch_norm_grad; - string identity; - string lrn; - string lrn_grad; - string matmul; - string max_pool; - string max_pool_grad; - string maximum; - string mkl_conv2d; - string mkl_conv2d_grad_input; - string mkl_conv2d_grad_filter; - string mkl_conv2d_with_bias; - string mkl_conv2d_with_bias_backprop_bias; - string mul; - string relu; - string relu_grad; - string reshape; - string split; - string squared_difference; - string sub; - } ConstStringsInfo; - - private: - /// Maintain info about nodes to rewrite - std::vector rinfo_; - - /// Maintain info about nodes to add workspace edge - std::vector wsinfo_; - - /// Maintain info about nodes to be merged - std::vector minfo_; - - /// Maintain info about nodes to rewrite - static std::vector cinfo_; - - /// Maintain structure of constant strings - static ConstStringsInfo csinfo_; - - /// Context variables used in referencing rules - static ContextInfo biasaddgrad_matmul_context_; - static ContextInfo biasaddgrad_conv2dwithbias_context_; - - private: - // Is OpDef::ArgDef a list type? It could be N * T or list(type). - // Refer to opdef.proto for details of list type. - inline bool ArgIsList(const OpDef::ArgDef& arg) const { - return !arg.type_list_attr().empty() || !arg.number_attr().empty(); - } - - // Get length of a list in 'n' if 'arg' is of list type. Refer to - // description of ArgIsList for definition of list type. - inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) { - CHECK_EQ(ArgIsList(arg), true); - int N = 0; - const string attr_name = !arg.type_list_attr().empty() - ? arg.type_list_attr() - : arg.number_attr(); - if (!arg.type_list_attr().empty()) { - std::vector value; - TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value)); - N = value.size(); - } else { - TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N)); - } - return N; - } - - // Can op represented by node 'n' run on DEVICE_CPU? - // Op can run on CPU with MKL if the runtime assigned device or the - // user requested device contains device CPU, or both are empty. - bool CanOpRunOnCPUDevice(const Node* n) { - bool result = true; - string reason; - - // Substring that should be checked for in device name for CPU device. - const char* const kCPUDeviceSubStr = "CPU"; - - // If Op has been specifically assigned to a non-CPU device, then No. - if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { - result = false; - reason = "Op has been assigned a runtime device that is not CPU."; - } - - // If user has specifically assigned this op to a non-CPU device, then No. - if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { - result = false; - reason = "User has assigned a device that is not CPU."; - } - - if (result == false) { - VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node " - << n->type_string() << ", reason: " << reason; - } - - // Otherwise Yes. - return result; - } - - // Return a node that can be merged with input node 'n' - // - // @return pointer to the node if we can find such a - // node. Otherwise, it returns nullptr. - Node* CheckForNodeMerge(const Node* n) const; - - // Merge predecessor node with its successor. - // Currently, we merge Conv2D with BiasAdd only. - // - // Input nodes succ and pred may be deleted if the call to - // this function is successful. Attempt to use the pointers - // after the call to function may result in undefined behaviors. - // - // @input g - input graph, succ - successor node, pred - predecessor node - // @return Status::OK(), if merging is successful and supported. - // Returns appropriate Status error code otherwise. - // Graph is updated in case nodes are merged. Otherwise, it is - // not updated. - Status MergeNode(std::unique_ptr* g, Node* succ, Node* pred); - - // Check if the node 'n' has any applicable rewrite rule - // We check for 2 scenarios for rewrite. - // - // @return RewriteInfo* for the applicable rewrite rule - const RewriteInfo* CheckForNodeRewrite(const Node* n) const; - - // Default rewrite rule to be used in scenario 1 for rewrite. - // @return - true (since we want to always rewrite) - static bool AlwaysRewrite(const Node* n, const ContextInfo* c = nullptr) { - return true; - } - - // Check if we are performing pooling on depth or batch. If it is, then we - // do not rewrite MaxPool node to Mkl version. - // @return - true (if it is not a depth/batch wise pooling case); - // false otherwise. - static bool NonDepthBatchWisePoolRewrite(const Node* n, - const ContextInfo* c) { - CHECK_NOTNULL(n); - - string data_format_str; - TensorFormat data_format; - std::vector ksize, strides; - CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true); - CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true); - CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(), true); - CHECK_EQ(FormatFromString(data_format_str, &data_format), true); - - // Condition that specifies non-batch-wise and non-depth-wise pooling. - if (GetTensorDim(ksize, data_format, 'N') == 1 && - GetTensorDim(strides, data_format, 'N') == 1 && - GetTensorDim(ksize, data_format, 'C') == 1 && - GetTensorDim(strides, data_format, 'C') == 1) { - return true; - } - - return false; - } - - static bool AddNRewrite(const Node* n, const ContextInfo* c) { - CHECK_NOTNULL(n); - - int num; - CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true); - - // Condition that specifies non-batch-wise and non-depth-wise pooling. - if (num == 2) { - return true; - } - - return false; - } - // Is BiasAddGrad node in 'n' is associated with Conv2DWithBias node - // specified in contextinfo 'ci'. Function updates fwd_node to point - // to Conv2DWithBias node if 'n' is associated with Conv2DWithBias. - // - // Association checks for one of the following graphs: - // - // Graph A: - // - // _ = Conv2DWithBias(F, I, _) - // .. - // _ = Conv2DBackpropFilter(F, _, G) - // _ = Conv2DBackpropInput(_, I, G) - // _ = BiasAddGrad(G) - // - // OR - // - // Graph B: - // - // _ = Conv2DWithBias(F, _, _) - // .. - // _ = Conv2DBackpropFilter(F, _, G) - // _ = BiasAddGrad(G) - // - // Here F, G, and I are graph nodes; _ represents graph nodes that we - // don't care here. - // - // @return - true (if BiasAddGrad is associated with Conv2DWithBias); - // false otherwise. - static bool IsBiasAddGradInConv2DWithBiasContext(const Node* n, - const Node** fwd_node, - void* ci) { - CHECK_NOTNULL(n); - CHECK_NOTNULL(fwd_node); - CHECK_NOTNULL(ci); - *fwd_node = nullptr; - - CHECK_EQ(n->type_string(), csinfo_.bias_add_grad); - - // Get the only 1 input of BiasAddGrad. - CHECK_EQ(n->num_inputs(), 1); - const Node* bias_add_grad_inp = nullptr; - TF_CHECK_OK(n->input_node(0, &bias_add_grad_inp)); - CHECK_NOTNULL(bias_add_grad_inp); - - // Check if this input also goes to BackpropFilter and BackpropInput - // as 3rd input. - bool found_backprop_input = false; - bool found_backprop_filter = false; - Node* backprop_filter_node = nullptr; - Node* backprop_input_node = nullptr; - - for (const Edge* e : bias_add_grad_inp->out_edges()) { - Node* third_input = nullptr; - if (e->dst()->type_string() == csinfo_.conv2d_grad_input || - e->dst()->type_string() == csinfo_.mkl_conv2d_grad_input) { - // Third input (index 2) of BackpropInput - TF_CHECK_OK(e->dst()->input_node(2, &third_input)); - // Third input (index 2) of BackpropInput must be same as the input - // of BiasAddGrad. - if (third_input == bias_add_grad_inp) { - found_backprop_input = true; - backprop_input_node = e->dst(); - } - } - - if (e->dst()->type_string() == csinfo_.conv2d_grad_filter || - e->dst()->type_string() == csinfo_.mkl_conv2d_grad_filter) { - // Third input (index 2) of BackpropFilter - TF_CHECK_OK(e->dst()->input_node(2, &third_input)); - // Third input (index 2) of BackpropFilter must be same as the input - // of BiasAddGrad. - if (third_input == bias_add_grad_inp) { - found_backprop_filter = true; - backprop_filter_node = e->dst(); - } - } - - // If we found both the nodes, then we can stop the search. - if (found_backprop_input && found_backprop_filter) { - break; - } - } - - // If BackpropFilter node is not found, then this is not - // Conv2DWithBias context. For 2nd graph in the example above, only - // BackpropFilter would be present. - if (!found_backprop_filter) { - return false; - } - - // Otherwise, we found the nodes. - CHECK_NOTNULL(backprop_filter_node); - if (found_backprop_input) { - CHECK_NOTNULL(backprop_input_node); - } - - // Now that we confirmed that this is Conv2DWithBias context, we need to - // get access to the forward node (Conv2DWithBias). 2nd input of - // Conv2DWithBias is same as the 2nd input of Conv2DBackpropInput; 1st - // input of Conv2DWithBias is same as the 1st input of Conv2DBackpropFilter - // (This comes from definition of gradient computation for Conv2D). - if (found_backprop_input) { - // Graph A in the example. - Node* second_inp_of_input = nullptr; - Node* first_inp_of_filter = nullptr; - TF_CHECK_OK(backprop_input_node->input_node(1, &second_inp_of_input)); - TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter)); - CHECK_NOTNULL(second_inp_of_input); - CHECK_NOTNULL(first_inp_of_filter); - - // Now we need to find out Conv2DWithBias node from these input nodes. - // Conv2DWithBias node is the node that accepts both the nodes - // second_inp_of_input and first_inp_of_filter in 2nd and 1st input slots. - for (const Edge* fe : first_inp_of_filter->out_edges()) { - if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias && - fe->dst_input() == 0) { - for (const Edge* ie : second_inp_of_input->out_edges()) { - if (ie->dst()->type_string() == csinfo_.mkl_conv2d_with_bias && - ie->dst_input() == 1 && fe->dst() == ie->dst()) { - VLOG(1) << "MklLayoutRewritePass: found " - << fe->dst()->DebugString() - << " as the forward node for matching context, backward" - << " node is: " << n->DebugString(); - *fwd_node = fe->dst(); - return true; - } - } - } - } - } else { - // We did not find BackpropInput, so we work with BackpropFilter only. - // Graph B in the example. - Node* first_inp_of_filter = nullptr; - TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter)); - CHECK_NOTNULL(first_inp_of_filter); - - // Now we need to find out Conv2DWithBias node from first input of - // BackpropFIlter. Conv2DWithBias node is the node that accepts - // first_inp_of_filter in 1st input slot. - for (const Edge* fe : first_inp_of_filter->out_edges()) { - if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias && - fe->dst_input() == 0) { - VLOG(1) << "MklLayoutRewritePass: found " << fe->dst()->DebugString() - << " as the forward node for matching context, backward" - << " node is: " << n->DebugString(); - *fwd_node = fe->dst(); - return true; - } - } - } - - return false; - } - - // Is BiasAddGrad node in 'n' is associated with MatMul node - // specified in contextinfo 'ci'. Function does not update fwd_node. - // - // @return - true (if BiasAddGrad is associated with MatMul); - // false otherwise. - static bool IsBiasAddGradInMatMulContext(const Node* n, const Node** fwd_node, - void* ci) { - return (!IsBiasAddGradInConv2DWithBiasContext(n, fwd_node, ci)); - } - - // Rewrite rule that uses context-information for matching, - // used in scenario 2. - // - // @input - Node 'n' for which to search for matching context - // @input - The context 'c' under which to rewrite - // @return - true if we can rewrite node under context 'c'; - // false otherwise. - static bool ContextMatchRewrite(const Node* n, const ContextInfo* c); - - // Helper function that searches the matching contextinfo for the node. - // - // @input n - Node (gradient op) whose contextinfo is to be searched, - // fwd_node - pointer to node from the forward pass that this node - // belongs to. fwd_node cannot be NULL. - // @return Matching contextinfo in case a match is found; null otherwise. - // Also updates *fwd_node with pointer to forward node that this - // context matches. - static const ContextInfo* SearchMatchingContext(const Node* n, - const Node** fwd_node); - - // Rewrites input node to a new node specified by its matching rewrite info. - // - // Method first searches matching rewrite info for input node and then - // uses that info to rewrite. - // - // Input node may be deleted in case of rewrite. Attempt to use the node - // after the call can result in undefined behaviors. - // - // @input g - input graph, n - Node to be rewritten, - // ri - matching rewriteinfo - // @return Status::OK(), if the input node is rewritten; - // Returns appropriate Status error code otherwise. - // Graph is updated in case the input node is rewritten. - // Otherwise, it is not updated. - Status RewriteNode(std::unique_ptr* g, Node* n, const RewriteInfo* ri); - - // Get nodes that will feed a list of TF tensors to the new - // node that we are constructing. - // - // @input g - input graph, - // @input inputs - inputs to old node that we are using for constructing - // new inputs, - // @input input_idx - the index in the 'inputs' vector pointing to the - // current input that we have processed so far - // @output input_idx - index will be incremented by the number of nodes - // from 'inputs' that are processed - // @input list_length - The expected length of list of TF tensors - // @output output_nodes - the list of new nodes creating TF tensors - // - // @return None - void GetNodesProducingTFTensorList( - const gtl::InlinedVector, 4>& inputs, - int* input_idx, int list_length, - std::vector* output_nodes); - - // Get nodes that will feed a list of Mkl tensors to the new - // node that we are constructing. - // - // @input g - input graph, - // @input orig_node - Original node that we are rewriting - // @input inputs - inputs to old node that we are using for constructing - // new inputs, - // @input input_idx - the index in the 'inputs' vector pointing to the - // current input that we have processed so far - // @output input_idx - index will be incremented by the number of nodes - // from 'inputs' that are processed - // @input list_length - The expected length of list of Mkl tensors - // @output output_nodes - the list of new nodes creating Mkl tensors - // - // @return None - void GetNodesProducingMklTensorList( - std::unique_ptr* g, Node* orig_node, - const gtl::InlinedVector, 4>& inputs, - int* input_idx, int list_length, - std::vector* output_nodes); - - // Get a node that will feed an Mkl tensor to the new - // node that we are constructing. The output node could be (1) 'n' - // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor - // if 'n' is not an Mkl layer. - // - // @input g - input graph, - // @input orig_node - Original node that we are rewriting, - // @input n - Node based on which we are creating Mkl node, - // @input n_output_slot - the output slot of node 'n' - // which is feeding to the node that we are constructing - // @output mkl_node - the new node that will feed Mkl tensor - // @output mkl_node_output_slot - the slot number of mkl_node that - // will feed the tensor - // @return None - void GetNodeProducingMklTensor(std::unique_ptr* g, Node* orig_node, - Node* n, int n_output_slot, Node** mkl_node, - int* mkl_node_output_slot); - - // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb' - // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are - // set up in contiguous fashion. 'workspace_tensors' carry graph nodes - // producing workspace edges if 'are_workspace_tensors_available' is true. - // Otherwise, 'workspace_tensors' is empty vector. - // - // For details, refer to 'Ordering of inputs after rewriting' section in the - // documentation above. - // - // Returns Status::OK() if setting up inputs is successful, otherwise - // returns appropriate status code. - int SetUpContiguousInputs( - std::unique_ptr* g, - const gtl::InlinedVector, 4>& old_node_inputs, - NodeBuilder* nb, Node* old_node, - std::vector* workspace_tensors, - bool are_workspace_tensors_available); - - // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb' - // in graph 'g'. Original node is input in 'orig_node'. - // - // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors' - // section in the documentation above. - // - // Returns Status::OK() if setting up inputs is successful, otherwise - // returns appropriate status code. - Status SetUpInputs(std::unique_ptr* g, - const gtl::InlinedVector, 4>& inputs, - NodeBuilder* nb, Node* orig_node); - - // Add workspace edge on the input or output side of Node 'orig_node' by using - // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate - // adding workspace edge then do not add it. Workspace Tensorflow and Mkl - // tensors, if they need to be added, will be set into these tensors. - // If we set workspace tensors, then are_ws_tensors_added should be true. - void AddWorkSpaceEdgeIfNeeded(std::unique_ptr* g, Node* orig_node, - NodeBuilder* nb, - std::vector* ws_tensors, - bool* are_ws_tensors_added); - - // Functions specific to operators to copy attributes - // We need operator-specific function to copy attributes because the framework - // does not provide any generic function for it. - // NOTE: names are alphabetically sorted. - static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsIdentity(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb); - - // Generate a graph node in graph 'g' representing a dummy Mkl tensor node, - // using node for original node 'orig_node' and return it in '*out'. - // TODO(nhasabni) We should move this to mkl_util.h - void GetDummyMklTensorNode(std::unique_ptr* g, Node** out, - Node* orig_node); - void GetDummyWorkspaceTensorNode(std::unique_ptr* g, Node** out, - Node* orig_node); -}; - -MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_; -MklLayoutRewritePass::ContextInfo - MklLayoutRewritePass::biasaddgrad_conv2dwithbias_context_; -MklLayoutRewritePass::ContextInfo - MklLayoutRewritePass::biasaddgrad_matmul_context_; -std::vector MklLayoutRewritePass::cinfo_; - -// We register Mkl rewrite pass for phase 1 in post partitioning group. -// We register it here so that we get a complete picture of all users of Mkl -// nodes. Do not change the ordering of the Mkl passes. -const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup = - OptimizationPassRegistry::POST_PARTITIONING; -#ifdef ENABLE_MKL -REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass); -#endif // ENABLE_MKL - -////////////////////////////////////////////////////////////////////////// -// Helper functions for creating new node -////////////////////////////////////////////////////////////////////////// - -static void FillInputs(const Node* n, - gtl::InlinedVector* control_edges, - gtl::InlinedVector, 4>* in) { - control_edges->clear(); - for (const Edge* e : n->in_edges()) { - if (e->IsControlEdge()) { - control_edges->push_back(e->src()); - } else { - (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output()); - } - } - std::sort(control_edges->begin(), control_edges->end()); - if (n->op_def().is_commutative()) { - // For commutative inputs, we sort the input by the input Node* - // to get a canonical ordering (so that add(a,b) and add(b, a) will - // hash to the same value if is_commutative is true for 'add'). - std::sort(in->begin(), in->end()); - } -} - -void MklLayoutRewritePass::GetNodesProducingTFTensorList( - const gtl::InlinedVector, 4>& inputs, int* input_idx, - int list_length, std::vector* output_nodes) { - CHECK_LT(*input_idx, inputs.size()); - CHECK_GT(list_length, 0); - CHECK_NOTNULL(output_nodes); - output_nodes->reserve(list_length); - - while (list_length != 0) { - CHECK_GT(list_length, 0); - CHECK_LT(*input_idx, inputs.size()); - Node* n = inputs[*input_idx].first; - int slot = inputs[*input_idx].second; - // If input node 'n' is just producing a single tensor at - // output slot 'slot' then we just add that single node. - output_nodes->push_back(NodeBuilder::NodeOut(n, slot)); - (*input_idx)++; - list_length--; - } -} - -// TODO(nhasabni) We should move this to mkl_util.h. -void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr* g, - Node** out, Node* orig_node) { - // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent - // dummy Mkl tensor. 8 = 2*size_t. - const DataType dt = DataTypeToEnum::v(); - TensorProto proto; - proto.set_dtype(dt); - uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - proto.set_tensor_content(string(reinterpret_cast(zero), 8)); - TensorShape dummy_shape({8}); - dummy_shape.AsProto(proto.mutable_tensor_shape()); - TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") - .Attr("value", proto) - .Attr("dtype", dt) - .Device(orig_node->def().device()) // We place this node on - // the same device as the - // device of the original - // node. - .Finalize(&**g, out)); - CHECK_NOTNULL(*out); // Make sure we got a valid object before using it - - // If number of inputs to the original node is > 0, then we add - // control dependency between 1st input (index 0) of the original node and - // the dummy Mkl node. This is needed because control-flow ops such as Enter, - // Merge, etc, require frame_name of the dummy Mkl node to be same as the - // rewritten node. Adding control edge between 1st input of the original node - // and the dummy Mkl node ensures that the dummy node is in the same frame - // as the original node. Choosing 1st input is not necessary - any input of - // the original node is fine because all the inputs of a node are always in - // the same frame. - if (orig_node->num_inputs() > 0) { - Node* orig_input0 = nullptr; - TF_CHECK_OK( - orig_node->input_node(0, const_cast(&orig_input0))); - CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out)); - } - - (*out)->set_assigned_device_name(orig_node->assigned_device_name()); -} - -void MklLayoutRewritePass::GetNodesProducingMklTensorList( - std::unique_ptr* g, Node* orig_node, - const gtl::InlinedVector, 4>& inputs, int* input_idx, - int list_length, std::vector* output_nodes) { - CHECK_LT(*input_idx, inputs.size()); - CHECK_GT(list_length, 0); - CHECK_NOTNULL(output_nodes); - output_nodes->reserve(list_length); - - while (list_length != 0) { - CHECK_GT(list_length, 0); - CHECK_LT(*input_idx, inputs.size()); - Node* n = inputs[*input_idx].first; - int slot = inputs[*input_idx].second; - // If 'n' is producing a single tensor, then create a single Mkl tensor - // node. - Node* mkl_node = nullptr; - int mkl_node_output_slot = 0; - GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node, - &mkl_node_output_slot); - output_nodes->push_back( - NodeBuilder::NodeOut(mkl_node, mkl_node_output_slot)); - (*input_idx)++; - list_length--; - } -} - -// Get an input node that will feed Mkl tensor to the new -// node that we are constructing. An input node could be (1) 'n' -// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor -// if 'n' is not an Mkl layer. -void MklLayoutRewritePass::GetNodeProducingMklTensor( - std::unique_ptr* g, Node* orig_node, Node* n, int n_output_slot, - Node** mkl_node, int* mkl_node_output_slot) { - CHECK_NOTNULL(n); - CHECK_NOTNULL(mkl_node); - CHECK_NOTNULL(mkl_node_output_slot); - - // If this is an MKL op, then it will create extra output for MKL layout. - DataType T; - if (GetNodeAttr(n->def(), "T", &T).ok() && - mkl_op_registry::IsMklOp(n->type_string(), T)) { - // If this is an MKL op, then it will generate an edge that will receive - // Mkl tensor from a node. - // output slot number for Mkl tensor would be N+slot number of TensorFlow - // tensor, where N is total number of TensorFlow tensors. - *mkl_node = n; - *mkl_node_output_slot = - GetTensorMetaDataIndex(n_output_slot, n->num_outputs()); - } else { - // If we have not visited the node and rewritten it, then we need - // to create a dummy node that will feed a dummy Mkl tensor to this node. - // DummyMklTensor node has no input and generates only 1 output - // (dummy Mkl tensor) as output slot number 0. - GetDummyMklTensorNode(g, mkl_node, orig_node); - CHECK_NOTNULL(*mkl_node); - *mkl_node_output_slot = 0; - } -} - -int MklLayoutRewritePass::SetUpContiguousInputs( - std::unique_ptr* g, - const gtl::InlinedVector, 4>& old_node_inputs, - NodeBuilder* nb, Node* old_node, - std::vector* workspace_tensors, - bool are_workspace_tensors_available) { - CHECK_NOTNULL(workspace_tensors); - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - - // TODO(nhasabni): Temporary solution to connect filter input of - // BackpropInput with the converted filter from Conv2D. - bool do_connect_conv2d_backprop_input_filter = false; - Node* conv2d_node = nullptr; - // Filter node is 2nd input (slot index 1) of Conv2D. - int kConv2DFilterInputSlotIdx = 1; - int kConv2DBackpropInputFilterInputSlotIdx = 1; - int kConv2DFilterOutputSlotIdx = 1; - if (old_node->type_string() == csinfo_.conv2d_grad_input) { - // We need to find Conv2D node from Conv2DBackpropInput. - // For that let's first find filter node that is 2nd input (slot 1) - // of BackpropInput. - Node* filter_node = nullptr; - TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, - &filter_node)); - CHECK_NOTNULL(filter_node); - - // Now check which nodes receive from filter_node. Filter feeds as - // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias. - for (const Edge* e : filter_node->out_edges()) { - if (e->dst()->type_string() == csinfo_.mkl_conv2d && - e->dst_input() == kConv2DFilterInputSlotIdx - /* filter is 2nd input of Conv2D and _MklConv2D. */) { - if (conv2d_node != nullptr) { - VLOG(1) << "MklLayoutRewritePass: unusual case of same filter" - << " feeding multiple Conv2D nodes: " - << filter_node->DebugString(); - // We will not connect filter input of Conv2DBackpropInput - // to be safe here. - do_connect_conv2d_backprop_input_filter = false; - break; - } else { - conv2d_node = e->dst(); - do_connect_conv2d_backprop_input_filter = true; - } - } - } - } - - // Number of input slots to original op - // Input slots are represented by .Input() calls in REGISTER_OP. - int old_node_input_slots = old_node->op_def().input_arg_size(); - // Actual number of inputs can be greater than or equal to number - // of Input slots because inputs of type list could be unfolded. - CHECK_GE(old_node_inputs.size(), old_node_input_slots); - int nn_slot_idx = 0; // slot index for inputs of new node - - // Let's copy all inputs (TF tensors) of original node to new node. - int iidx = 0; - for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) { - // An input slot could be a single tensor or a list. We need - // to handle this case accordingly. - CHECK_LT(iidx, old_node_inputs.size()); - const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx); - if (ArgIsList(arg)) { - std::vector new_node_inputs; - int N = GetTensorListLength(arg, old_node); - GetNodesProducingTFTensorList(old_node_inputs, &iidx, N, - &new_node_inputs); - nb->Input(new_node_inputs); - nn_slot_idx++; - } else { - // Special case for connecting filter input of Conv2DBackpropInput - if (do_connect_conv2d_backprop_input_filter && - iidx == kConv2DBackpropInputFilterInputSlotIdx) { - nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx); - } else { - nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second); - } - iidx++; - nn_slot_idx++; - } - } - - // If workspace tensors are available for this op and we are using - // contiguous ordering then we need to add Tensorflow tensor for - // workspace here because Tensorflow tensor for workspace is the - // last tensor in the list of Tensorflow tensors. - if (are_workspace_tensors_available) { - CHECK_EQ(workspace_tensors->size(), 2); - // Tensorflow tensor - nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index); - nn_slot_idx++; - } - - // Let's now setup all Mkl inputs to new node. - // Number of Mkl inputs must be same as number of TF inputs. - iidx = 0; - for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) { - // An input slot could be a single tensor or a list. We need - // to handle this case accordingly. - CHECK_LT(iidx, old_node_inputs.size()); - const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx); - if (ArgIsList(arg)) { - std::vector new_node_inputs; - int N = GetTensorListLength(arg, old_node); - GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx, N, - &new_node_inputs); - nb->Input(new_node_inputs); - nn_slot_idx++; - } else { - Node* mkl_node = nullptr; - int mkl_node_output_slot = 0; - // Special case for connecting filter input of Conv2DBackpropInput - if (do_connect_conv2d_backprop_input_filter && - iidx == kConv2DBackpropInputFilterInputSlotIdx) { - GetNodeProducingMklTensor(g, old_node, conv2d_node, - kConv2DFilterOutputSlotIdx, &mkl_node, - &mkl_node_output_slot); - } else { - GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first, - old_node_inputs[iidx].second, &mkl_node, - &mkl_node_output_slot); - } - nb->Input(mkl_node, mkl_node_output_slot); - iidx++; - nn_slot_idx++; - } - } - - // If workspace tensors are available for this op and we are using - // contiguous ordering then we need to add Mkl tensor for - // workspace here because Mkl tensor for workspace is the - // last tensor in the list of Mkl tensors. - if (are_workspace_tensors_available) { - CHECK_EQ(workspace_tensors->size(), 2); - // Mkl tensor - nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index); - nn_slot_idx++; - } - - return nn_slot_idx; -} - -Status MklLayoutRewritePass::SetUpInputs( - std::unique_ptr* g, - const gtl::InlinedVector, 4>& old_node_inputs, - NodeBuilder* nb, Node* old_node) { - // Let's check if we need to add workspace tensors for this node. - // We add workspace edge only for MaxPool, LRN and BatchNorm. - std::vector workspace_tensors; - bool are_workspace_tensors_available = false; - AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors, - &are_workspace_tensors_available); - - int new_node_input_slots = 0; - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - // TODO(nhasabni): implement this function just for same of completion. - // We do not use interleaved ordering right now. - return Status( - error::Code::UNIMPLEMENTED, - "Interleaved ordering of tensors is currently not supported."); - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - new_node_input_slots = SetUpContiguousInputs( - g, old_node_inputs, nb, old_node, &workspace_tensors, - are_workspace_tensors_available); - } - - // Sanity check - int old_node_input_slots = old_node->op_def().input_arg_size(); - if (!are_workspace_tensors_available) { - // If we are not adding workspace tensors for this op, then the total - // number of input slots to the new node _must_ be 2 times the number - // of input slots to the original node: N original Tensorflow tensors and - // N for Mkl tensors corresponding to each Tensorflow tensors. - CHECK_EQ(new_node_input_slots, old_node_input_slots * 2); - } else { - // If we are adding workspace tensors for this op, then the total - // The total number of input slots to new node _must_ be 2 times the number - // of input slots to the original node: N original Tensorflow tensors and - // N for Mkl tensors corresponding to each Tensorflow tensors plus 2 - // (for workspace Tensorflow tensor and workspace Mkl tensor). - CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2); - } - - return Status::OK(); -} - -////////////////////////////////////////////////////////////////////////// -// Helper functions related to workspace pass -////////////////////////////////////////////////////////////////////////// - -// TODO(nhasabni) We should move this to mkl_util.h. -void MklLayoutRewritePass::GetDummyWorkspaceTensorNode( - std::unique_ptr* g, Node** out, Node* orig_node) { - // We use a tensor of shape {1} and value 0 to represent - // dummy float tensor. We need this as a dummy workspace tensor. - // Workspace tensor has type float. - const DataType dt = DataTypeToEnum::v(); - TensorProto proto; - proto.set_dtype(dt); - float zero[1] = {0}; - proto.set_tensor_content(string(reinterpret_cast(&zero), 4)); - TensorShape dummy_shape({1}); - dummy_shape.AsProto(proto.mutable_tensor_shape()); - TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") - .Attr("value", proto) - .Attr("dtype", dt) - .Device(orig_node->def().device()) // We place this node on - // same the device as the - // device of the original - // node. - .Finalize(&**g, out)); - CHECK_NOTNULL(*out); // Make sure we got a valid object before using it - - // If number of inputs to the original node is > 0, then we add - // control dependency between 1st input (index 0) of the original node and - // the dummy Mkl node. This is needed because control-flow ops such as Enter, - // Merge, etc, require frame_name of the dummy Mkl node to be same as the - // rewritten node. Adding control edge between 1st input of the original node - // and the dummy Mkl node ensures that the dummy node is in the same frame - // as the original node. Choosing 1st input is not necessary - any input of - // the original node is fine because all the inputs of a node are always in - // the same frame. - if (orig_node->num_inputs() > 0) { - Node* orig_input0 = nullptr; - TF_CHECK_OK( - orig_node->input_node(0, const_cast(&orig_input0))); - CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out)); - } - - (*out)->set_assigned_device_name(orig_node->assigned_device_name()); -} - -void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded( - std::unique_ptr* g, Node* orig_node, NodeBuilder* nb, - std::vector* ws_tensors, bool* are_ws_tensors_added) { - bool workspace_edge_added = false; // Default initializer - CHECK_NOTNULL(are_ws_tensors_added); - *are_ws_tensors_added = false; // Default initializer - - DataType T; - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - for (auto ws : wsinfo_) { - if (orig_node->type_string() == ws.fwd_op && - mkl_op_registry::IsMklOp( - mkl_op_registry::GetMklOpName(orig_node->type_string()), T)) { - // If this op is a fwd op, then we need to check if there is an - // edge from this node's fwd_slot to bwdop's bwd_slot. If there is - // an edge, then we just add an attribute on this node for setting - // workspace_passed to true. We don't add actual workspace edge - // in this node. Actual workspace edge gets added in the backward - // op for this node. - for (const Edge* e : orig_node->out_edges()) { - if (e->src_output() == ws.fwd_slot && - e->dst()->type_string() == ws.bwd_op && - e->dst_input() == ws.bwd_slot) { - nb->Attr("workspace_enabled", true); - VLOG(1) << "MklLayoutRewritePass: workspace_enabled for " - << orig_node->type_string(); - workspace_edge_added = true; - // We found the edge that we were looking for, so break. - break; - } - } - - if (!workspace_edge_added) { - // If we are here, then we did not find backward operator for this - // node. - nb->Attr("workspace_enabled", false); - } - } else if (orig_node->type_string() == ws.bwd_op && - mkl_op_registry::IsMklOp( - mkl_op_registry::GetMklOpName(orig_node->type_string()), - T)) { - // If this op is a bwd op, then we need to add workspace edge and - // it's Mkl tensor edge between its corresponding fwd op and this - // op. Corresponding fwd op is specified in 'fwd_op' field of - // workspace info. fwd_slot and bwd_slot in workspace info specify - // an edge between which slots connect forward and backward op. - // Once all these criteria match, we add a workspace edge between - // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is - // determined by interleaved/contiguous ordering. Function - // DataIndexToMetaDataIndex tells us the location of Mkl tensor - // from the location of the Tensorflow tensor. - for (const Edge* e : orig_node->in_edges()) { - if (e->src_output() == ws.fwd_slot && - // We would have rewritten the forward op, so we need to use - // GetMklOpName call to get its Mkl name. - e->src()->type_string() == - mkl_op_registry::GetMklOpName(ws.fwd_op) && - e->dst_input() == ws.bwd_slot) { - nb->Attr("workspace_enabled", true); - CHECK_NOTNULL(ws_tensors); - // Add workspace edge between fwd op and bwd op. - ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot)); - // Add Mkl tensor edge for workspace edge between fwd op and bwd op. - ws_tensors->push_back(NodeBuilder::NodeOut( - e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot, - e->src()->num_outputs()))); - *are_ws_tensors_added = true; - // In terms of input ordering, we add these calls to add Input - // here because workspace edge (and its Mkl tensor) is the last - // edge in the fwdop and bwdop. So all inputs before workspace - // tensor have been added by SetUpInputs function. - VLOG(1) << "MklLayoutRewritePass: workspace_enabled for " - << orig_node->type_string(); - workspace_edge_added = true; - // We found the edge that we were looking for, so break. - break; - } - } - - // If we are here means we did not find fwd op that feeds to this - // bwd op. So in this case, we need to generate dummy tensors for - // workspace input and Mkl tensor for workspace, and set - // workspace_enabled to false. - if (!workspace_edge_added) { - nb->Attr("workspace_enabled", false); - Node* dmt_ws = nullptr; // Dummy tensor for workspace - Node* dmt_mkl_ws = nullptr; // Dummy Mkl tensor for workspace - GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node); - GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node); - CHECK_NOTNULL(dmt_ws); - CHECK_NOTNULL(dmt_mkl_ws); - CHECK_NOTNULL(ws_tensors); - // We add dummy tensor as workspace tensor. - ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0)); - // We add dummy tensor as Mkl tensor for workspace tensor. - ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0)); - *are_ws_tensors_added = true; - VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for " - << orig_node->type_string(); - } - } else { - // If this node does not match any workspace info, then we do not - // do anything special for workspace propagation for it. - } - } -} - -////////////////////////////////////////////////////////////////////////// -// Op-specific functions to copy attributes from old node to new node -////////////////////////////////////////////////////////////////////////// - -void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - string data_format; - string padding; - std::vector strides; - bool use_cudnn_on_gpu; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); - TF_CHECK_OK( - GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("strides", strides); - nb->Attr("padding", padding); - nb->Attr("data_format", data_format); - nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); -} - -void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - int N; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("N", N); -} - -void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - string data_format; - std::vector strides; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("strides", strides); - nb->Attr("data_format", data_format); -} - -void MklLayoutRewritePass::CopyAttrsIdentity(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - // Add attributes to new node. - nb->Attr("T", T); -} - -void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - int depth_radius; - float bias; - float alpha; - float beta; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("depth_radius", depth_radius); - nb->Attr("bias", bias); - nb->Attr("alpha", alpha); - nb->Attr("beta", beta); -} - -void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - string data_format; - string padding; - std::vector ksize, strides; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("ksize", ksize); - nb->Attr("strides", strides); - nb->Attr("padding", padding); - nb->Attr("data_format", data_format); -} - -void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - - // Add attributes to new node. - nb->Attr("T", T); -} - -void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - DataType Tshape; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape)); - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("Tshape", Tshape); -} - -void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - string data_format; - int num_split; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("num_split", num_split); - nb->Attr("data_format", data_format); -} - -void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - int N; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("N", N); -} - -void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - int N; - DataType tidx; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("N", N); - nb->Attr("Tidx", tidx); -} - -void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - float epsilon; - string data_format; - bool is_training; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training)); - - // Add attributes to new node. - nb->Attr("T", T); - nb->Attr("epsilon", epsilon); - nb->Attr("data_format", data_format); - nb->Attr("is_training", is_training); -} - -////////////////////////////////////////////////////////////////////////// -// Helper functions related to node merge pass -////////////////////////////////////////////////////////////////////////// - -Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const { - // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite - // once we support BiasAddGrad as Mkl layer. - - // Search for all matching mergeinfo. - // We allow more than one match for extensibility. - std::vector matching_mi; - for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) { - if (a->type_string() == mi->succ) { - matching_mi.push_back(&*mi); - } - } - - for (const MergeInfo* mi : matching_mi) { - const int N_in = a->num_inputs(); - if (mi->op >= N_in) { - continue; - } - - // Get the control edges and input of node - gtl::InlinedVector a_control_edges; - gtl::InlinedVector, 4> a_in(N_in); - FillInputs(a, &a_control_edges, &a_in); - - // Get operand op of the operator - Node* b = nullptr; - b = a_in[mi->op].first; - if (b == nullptr || (b->type_string() != mi->pred)) { - // NOTE: Should the first check be assert? - continue; - } - - const int B_in = b->num_inputs(); - gtl::InlinedVector b_control_edges; - gtl::InlinedVector, 4> b_in(B_in); - FillInputs(b, &b_control_edges, &b_in); - - // Shouldn't merge if a and b have different control edges. - if (a_control_edges != b_control_edges) { - continue; - } else { - // We found a match. - return b; - } - } - - return nullptr; -} - -Status MklLayoutRewritePass::MergeNode(std::unique_ptr* g, Node* succ, - Node* pred) { - CHECK_NOTNULL(succ); - CHECK_NOTNULL(pred); - - if (succ->type_string() == csinfo_.bias_add && - pred->type_string() == csinfo_.mkl_conv2d) { - // 1. Get all attributes from input nodes. - DataType T_pred, T_succ; - string padding; - std::vector strides; - string data_format_pred, data_format_succ; - bool use_cudnn_on_gnu; - TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); - TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); - TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); - TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); - TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); - TF_CHECK_OK( - GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); - // We check to ensure that data formats of both succ and pred are same. - // We expect them to be same, so we can enforce this as assert. - // But assert can be too strict, so we enforce this as a check. - // If the check fails, then we do not merge two nodes. - // We also do same check for devices. - if (data_format_pred != data_format_succ || T_pred != T_succ || - pred->assigned_device_name() != succ->assigned_device_name() || - pred->def().device() != succ->def().device()) { - return Status(error::Code::INVALID_ARGUMENT, - "data_format or T attribute or devices of Conv2D and " - "BiasAdd do not match. Will skip node merge optimization"); - } - - const int succ_num = succ->num_inputs(); - gtl::InlinedVector succ_control_edges; - gtl::InlinedVector, 4> succ_in(succ_num); - FillInputs(succ, &succ_control_edges, &succ_in); - - const int pred_num = pred->num_inputs(); - gtl::InlinedVector pred_control_edges; - gtl::InlinedVector, 4> pred_in(pred_num); - FillInputs(pred, &pred_control_edges, &pred_in); - - // We need to ensure that there is only 1 edge between Conv2D and AddBias. - // Otherwise, merging is semantically incorrect. - if (pred->out_edges().size() != 1) { - return Status(error::Code::INVALID_ARGUMENT, - "Conv2D has multiple outputs." - "Will skip node merge optimization"); - } - - for (const Edge* e : pred->out_edges()) { - if (e->dst() != succ) { - return Status(error::Code::INVALID_ARGUMENT, - "Conv2D does not feed to BiasAdd." - "Will skip node merge optimization"); - } - } - - // 2. Get inputs from both the nodes. - // Find the 2 inputs from the conv and the bias from the add Bias. - // Get operand 0, 1 of conv2D and their Mkl tensors. - CHECK_EQ(pred->in_edges().size(), 4); // _MklConv2D must have 4 inputs. - // Get operand 1 of add_bias - // BiasAdd must have 2 inputs: Conv, bias - CHECK_EQ(succ->in_edges().size(), 2); - Node* oper3_mkl = nullptr; // Mkl tensor corresponding to oper3 - int oper3_mkl_slot = 0; // For dummy MKL tensor node, output slot is 0. - GetDummyMklTensorNode(g, &oper3_mkl, pred); // Get dummy Mkl tensor node - // as BiasAdd does not have Mkl tensor as input. - CHECK_NOTNULL(oper3_mkl); - - // We will use the node name of BiasAdd as the name of new node - // Build new node. We use same name as original node, but change the op - // name. - NodeBuilder nb(succ->name(), csinfo_.mkl_conv2d_with_bias); - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - nb.Input(pred_in[0].first, pred_in[0].second); // In1 of Conv2D - // pred_in[1] will be Mkl tensor for In1 if we follow interleaved - // ordering, and it will be 2nd Tensorflow tensor for Conv2D if - // we follow contiguous ordering. - nb.Input(pred_in[1].first, pred_in[1].second); // Mkl for In1 - nb.Input(pred_in[2].first, pred_in[2].second); // In2 of Conv2D - nb.Input(pred_in[3].first, pred_in[3].second); // Mkl for In2 - nb.Input(succ_in[1].first, succ_in[1].second); // In2 of BiasAdd - nb.Input(oper3_mkl, oper3_mkl_slot); // Mkl for In2 of BiasAdd - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - nb.Input(pred_in[0].first, pred_in[0].second); // In1 of Conv2D - // pred_in[1] will be Mkl tensor for In1 if we follow interleaved - // ordering, and it will be 2nd Tensorflow tensor for Conv2D if - // we follow contiguous ordering. - nb.Input(pred_in[1].first, pred_in[1].second); // In2 of Conv2D - nb.Input(succ_in[1].first, succ_in[1].second); // In2 of BiasAdd - nb.Input(pred_in[2].first, pred_in[2].second); // Mkl for In1 of Conv2D - nb.Input(pred_in[3].first, pred_in[3].second); // Mkl for In2 of Conv2D - nb.Input(oper3_mkl, oper3_mkl_slot); // Mkl for In2 of BiasAdd - } - - // Copy attributes from Conv2D to Conv2DWithBias. - CopyAttrsConv2D(const_cast(pred), &nb); - - // Copy the device assigned to old node to new node. - nb.Device(succ->def().device()); - - // Create node. - Node* new_node; - TF_CHECK_OK(nb.Finalize(&**g, &new_node)); - CHECK_NOTNULL(new_node); - - // Set the Mkl layer label for this op. - new_node->AddAttr("_kernel", mkl_op_registry::kMklOpLabel); - - // Incoming data edges from 'pred' node and 'succ' node to new 'new_node' - // node are already copied in BuildNode. We handle control edges now. - for (const Edge* e : pred->in_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node)); - } - } - for (const Edge* e : succ->in_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node)); - } - } - - // Incoming edges are fixed, we will fix the outgoing edges now. - // First, we will fix outgoing control edges from 'pred' node. - // We don't need to handle outgoing data edges from 'pred' node - // because pred has only 1 output going to succ node (we enforced - // this check for merge already). - for (const Edge* e : pred->out_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst())); - } - } - - // Second, we will fix outgoing control and data edges from 'succ' node. - for (const Edge* e : succ->out_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst())); - } else { - CHECK_NOTNULL( - (*g)->AddEdge(new_node, e->src_output(), e->dst(), e->dst_input())); - } - } - - // Copy device assigned to old node to new node. - // It's ok to use pred or succ as we have enforced a check that - // both have same device assigned. - new_node->set_assigned_device_name(pred->assigned_device_name()); - - VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString() - << ", and node: " << succ->DebugString() - << ", into node:" << new_node->DebugString(); - - (*g)->RemoveNode(succ); - (*g)->RemoveNode(pred); - - return Status::OK(); - } - - return Status(error::Code::UNIMPLEMENTED, - "Unimplemented case for node merge optimization."); -} - -////////////////////////////////////////////////////////////////////////// -// Helper functions for node rewrite -////////////////////////////////////////////////////////////////////////// - -Status MklLayoutRewritePass::RewriteNode(std::unique_ptr* g, - Node* orig_node, - const RewriteInfo* ri) { - CHECK_NOTNULL(ri); - CHECK_NOTNULL(orig_node); - - VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString(); - - // Check if this is scenario 2 (context-based rewrite). - // Get the matching ContextInfo if it is. - const Node* fwd_node = nullptr; - const ContextInfo* ci = nullptr; - bool is_context_based_rewrite = false; - if ((ci = SearchMatchingContext(orig_node, &fwd_node)) != nullptr) { - is_context_based_rewrite = true; - - // Sanity checks for context-based rewrite (if any) - if (orig_node->type_string() == csinfo_.bias_add_grad && - ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) { - CHECK_NOTNULL(fwd_node); - DataType orig_T, ctx_T; - string orig_data_format, ctx_data_format; - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &orig_T)); - TF_CHECK_OK( - GetNodeAttr(orig_node->def(), "data_format", &orig_data_format)); - TF_CHECK_OK(GetNodeAttr(fwd_node->def(), "T", &ctx_T)); - TF_CHECK_OK( - GetNodeAttr(fwd_node->def(), "data_format", &ctx_data_format)); - - if (orig_data_format != ctx_data_format || orig_T != ctx_T || - orig_node->assigned_device_name() != - fwd_node->assigned_device_name() || - orig_node->def().device() != fwd_node->def().device()) { - return Status( - error::Code::INVALID_ARGUMENT, - "data_format or T attribute or devices of BiasAddGrad and " - "Conv2D do not match. Will skip node rewrite optimization"); - } - } else if (orig_node->type_string() == csinfo_.bias_add_grad && - ri->new_name == csinfo_.matmul) { - // When BiasAddGrad has MatMul in context, we do not do any rewrite - // and leave BiasAddGrad as it is. But we check for this condition - // when we check for node rewrite rule. So we should not even come - // here for MatMul. So we will fail now. - return Status( - error::Code::INVALID_ARGUMENT, - "No rewrite is required for BiasAddGrad for MatMul context."); - } - } - - // Get all inputs. - int num_inputs = orig_node->in_edges().size(); - - // Drop count for control edges from inputs - for (const Edge* e : orig_node->in_edges()) { - if (e->IsControlEdge()) { - num_inputs--; - } - } - - gtl::InlinedVector control_edges; - gtl::InlinedVector, 4> inputs(num_inputs); - FillInputs(orig_node, &control_edges, &inputs); - - // Build new node. We use same name as original node, but change the op name. - NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str()); - // Copy user-specified device assigned to original node to new node. - nb.Device(orig_node->def().device()); - // Set up new inputs to the rewritten node. - Status s = SetUpInputs(g, inputs, &nb, orig_node); - if (s != Status::OK()) { - return s; - } - - // Copy attributes from original node to new node (for scenario 1). - // For context-based rewrite, we use context to copy the attributes. - if (is_context_based_rewrite) { - if (orig_node->type_string() == csinfo_.bias_add_grad && - ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) { - CHECK_NOTNULL(fwd_node); - ri->copy_attrs(fwd_node, &nb); - } else { - return Status(error::Code::UNIMPLEMENTED, - "Unimplemented case for node rewrite optimization."); - } - } else { - ri->copy_attrs(const_cast(orig_node), &nb); - } - // Set the Mkl layer label for this op. - nb.Attr("_kernel", mkl_op_registry::kMklOpLabel); - - // Finalize graph and get new node. - Node* new_node = nullptr; - TF_CHECK_OK(nb.Finalize(&**g, &new_node)); - CHECK_NOTNULL(new_node); - - // Incoming data edges from 'orig_node' node to new 'new_node' node are - // already copied in BuildNode. We need to handle control edges now. - for (const Edge* e : orig_node->in_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node)); - } - } - - // Copy outgoing edges from 'orig_node' node to new - // 'new_node' node, since the output also follows same ordering among - // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow - // tensors appropriately. Specifically, nth output of the original node - // will become 2*nth output of the Mkl node for the interleaved ordering - // of the tensors. For the contiguous ordering of the tensors, it will be n. - // GetTensorDataIndex provides this mapping function. - for (const Edge* e : orig_node->out_edges()) { - if (e->IsControlEdge()) { - CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst())); - } else { - CHECK_NOTNULL((*g)->AddEdge( - new_node, - GetTensorDataIndex(e->src_output(), e->src()->num_outputs()), - e->dst(), e->dst_input())); - } - } - - // Copy the runtime device assigned from original code to new node. - new_node->set_assigned_device_name(orig_node->assigned_device_name()); - - // Delete original node and mark new node as rewritten. - (*g)->RemoveNode(orig_node); - - VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString(); - return Status::OK(); -} - -const MklLayoutRewritePass::ContextInfo* -MklLayoutRewritePass::SearchMatchingContext(const Node* n, - const Node** fwd_node) { - CHECK_NOTNULL(n); - CHECK_NOTNULL(fwd_node); - *fwd_node = nullptr; - - // Search for matching contextinfo based on node name and call - // callback function using matching contextinfo. - // There could be more than one matching contextinfos but whichever - // matches first is returned. - for (auto ci = cinfo_.cbegin(); ci != cinfo_.cend(); ++ci) { - if (n->type_string() == (*ci)->node && - (*ci)->context_match_fn(n, fwd_node, *ci)) { - VLOG(1) << "Found context as matching: " << (*ci)->fwd; - return *ci; - } - } - return nullptr; -} - -bool MklLayoutRewritePass::ContextMatchRewrite(const Node* n, - const ContextInfo* c) { - const Node* fwd_node = nullptr; - return SearchMatchingContext(n, &fwd_node) == c; -} - -const MklLayoutRewritePass::RewriteInfo* -MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { - CHECK_NOTNULL(n); - - // First check if node along with its type is supported by MKL layer. - // We do not want to rewrite an op into Mkl op if types are not supported. - // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to - // MklRelu if type is INT32. - DataType T; - if (!GetNodeAttr(n->def(), "T", &T).ok()) { - return nullptr; - } - - // BiasAddGrad is not an Mkl layer, so we make an exception for it. - if (n->type_string() != csinfo_.bias_add_grad) { - if (!mkl_op_registry::IsMklOp( - mkl_op_registry::GetMklOpName(n->type_string()), T)) { - return nullptr; - } - } - - // For elementwise node, we reuse the Eigen implementation and pass the MKL - // metadata tensor through so we can avoid conversions. However, if all - // incoming edges are in TF format, we don't need all this overhead, so - // replace the elementwise node only if at least one of its parents is a MKL - // node. - // - // TODO(vrane): Add implementation for element-wise ops that doesn't reuse - // eigen code to reduce cross-library dependency. - if (mkl_op_registry::IsMklElementWiseOp( - mkl_op_registry::GetMklOpName(n->type_string()), T)) { - bool incoming_mkl_edge = false; - for (auto parent : n->in_edges()) { - if (mkl_op_registry::IsMklOp( - mkl_op_registry::GetMklOpName(parent->src()->type_string()), T)) { - incoming_mkl_edge = true; - break; - } else { - VLOG(1) << "Non-MKL parent is: " << parent->src()->type_string(); - } - } - if (incoming_mkl_edge == false) { - VLOG(1) << "Skipping replacement of elementwise node which has no MKL " - "parents."; - return nullptr; - } - } - - // We support 2 types of node rewrites: - // 1. Rewriting BiasAddGrad depending on its MklConv2DWithBias context. - // 2. Rewriting an op to Mkl op always - // We return true if any of these 2 conditions is met. - - // Find matching RewriteInfo and then check that rewrite rule applies. - for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) { - if (n->type_string().compare(ri->name) == 0 && - ri->rewrite_rule(n, ri->context)) { - // If we are rewriting BiasAddGrad into BiasAddGrad for MatMul context, - // then we just return directly. - if (n->type_string() == csinfo_.bias_add_grad && - ri->context->fwd == csinfo_.matmul && - ri->new_name == csinfo_.bias_add_grad) { - return nullptr; - } - return &*ri; - } - } - - // Else return not found. - return nullptr; -} - -/////////////////////////////////////////////////////////////////////////////// -// Run function for the pass -/////////////////////////////////////////////////////////////////////////////// - -bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { - bool result = false; - CHECK_NOTNULL(g); - - DumpGraph("Before running MklLayoutRewritePass", &**g); - - std::vector order; - GetReversePostOrder(**g, &order); // This will give us topological sort. - - for (Node* n : order) { - // If node is not an op or it cannot run on CPU device, then skip. - if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { - continue; - } - - const RewriteInfo* ri = nullptr; - Node* predn = nullptr; - // We will first search if node is to be rewritten - if ((ri = CheckForNodeRewrite(n)) != nullptr) { - string node_name = n->name(); - string op_name = n->type_string(); - - VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name - << " with op " << op_name << " for rewrite using" - << " layout optimization."; - - if (RewriteNode(g, n, ri) == Status::OK()) { - VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name - << " with op " << op_name << " for Mkl layout optimization."; - result = true; - } - } else if ((predn = CheckForNodeMerge(n)) != nullptr) { - // Otherwise, we will check if the node is to be merged. - string n1_name = n->name(); - string n2_name = predn->name(); - - VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and " - << n2_name << " for merging"; - - if (MergeNode(g, n, predn) == Status::OK()) { - VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and " - << n2_name; - result = true; - } - } - } - - DumpGraph("After running MklLayoutRewritePass", &**g); - - return result; -} - -bool RunMklLayoutRewritePass(std::unique_ptr* g) { - return MklLayoutRewritePass().RunPass(g); -} - -Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) { - if (options.graph == nullptr && options.partition_graphs == nullptr) { - return Status::OK(); - } - - auto process_graph = [&](std::unique_ptr* g) { - // Get the ownership of a graph - std::unique_ptr* ng = std::move(g); - RunPass(ng); - // Return the ownership of a graph back - g->reset(ng->release()); - }; - - if (kMklLayoutRewritePassGroup != - OptimizationPassRegistry::POST_PARTITIONING) { - // For any pre-partitioning phase, a graph is stored in options.graph. - process_graph(options.graph); - } else { - // For post partitioning phase, graphs are stored in - // options.partition_graphs. - for (auto& pg : *options.partition_graphs) { - process_graph(&pg.second); - } - } - - return Status::OK(); -} - -#else // INTEL_MKL_ML_ONLY - // This pass implements rewriting of graph to support following scenarios: // (A) Merging nodes in the graph // (B) Rewriting a node in the graph to a new node @@ -4539,7 +2364,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) { return Status::OK(); } -#endif // INTEL_MKL_ML_ONLY + } // namespace tensorflow #endif diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 77640e287c..0eda8170f8 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -37,1869 +37,6 @@ limitations under the License. namespace tensorflow { -#ifdef INTEL_MKL_ML_ONLY - -namespace { - -const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0"; -const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0"; - -static void InitGraph(const string& s, Graph* graph, - const string& device = kCPUDevice) { - GraphDef graph_def; - - auto parser = protobuf::TextFormat::Parser(); - // parser.AllowRelaxedWhitespace(true); - CHECK(parser.MergeFromString(s, &graph_def)) << s; - GraphConstructorOptions opts; - TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph)); - - for (Node* node : graph->nodes()) { - node->set_assigned_device_name(device); - } -} - -class MklLayoutPassTest : public ::testing::Test { - public: - MklLayoutPassTest() : graph_(OpRegistry::Global()) {} - - void InitGraph(const string& s, const string& device = kCPUDevice) { - ::tensorflow::InitGraph(s, &graph_, device); - original_ = CanonicalGraphString(&graph_); - } - - static bool IncludeNode(const Node* n) { return n->IsOp(); } - - static string EdgeId(const Node* n, int index) { - if (index == 0) { - return n->name(); - } else if (index == Graph::kControlSlot) { - return strings::StrCat(n->name(), ":control"); - } else { - return strings::StrCat(n->name(), ":", index); - } - } - - string CanonicalGraphString(Graph* g) { - std::vector nodes; - std::vector edges; - for (const Node* n : g->nodes()) { - if (IncludeNode(n)) { - nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")")); - } - } - for (const Edge* e : g->edges()) { - if (IncludeNode(e->src()) && IncludeNode(e->dst())) { - edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->", - EdgeId(e->dst(), e->dst_input()))); - } - } - // Canonicalize - std::sort(nodes.begin(), nodes.end()); - std::sort(edges.begin(), edges.end()); - return strings::StrCat(str_util::Join(nodes, ";"), "|", - str_util::Join(edges, ";")); - } - - string DoMklLayoutOptimizationPass() { - string before = CanonicalGraphString(&graph_); - LOG(ERROR) << "Before MKL layout rewrite pass: " << before; - - std::unique_ptr* ug = new std::unique_ptr(&graph_); - RunMklLayoutRewritePass(ug); - - string result = CanonicalGraphString(&graph_); - LOG(ERROR) << "After MKL layout rewrite pass: " << result; - return result; - } - - const string& OriginalGraph() const { return original_; } - - Graph graph_; - string original_; -}; - -REGISTER_OP("Input").Output("o: float").SetIsStateful(); -REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful(); -REGISTER_OP("HalfInput").Output("o: half").SetIsStateful(); -REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful(); -REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful(); -REGISTER_OP("_MklInput2") - .Output("o: uint8") - .Output("o1: uint8") - .SetIsStateful(); - -///////////////////////////////////////////////////////////////////// -// Unit tests related to node merge optiimization -///////////////////////////////////////////////////////////////////// - -TEST_F(MklLayoutPassTest, Basic) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Zeta);D(Zeta)|" - "A->C;A->D;B->C:1;B->D:1"); -} - -// Test set 1: Conv2D + AddBias - -// C=_MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved ordering) -// C=_MklConv2D(A,B,M,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous ordering) -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);" - "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->E;" - "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;M->E:3;" - "N->E:4;Y->Z:1"); -} - -// C=_MklConv2D(A,M:1,B,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved) -// C=_MklConv2D(A,B,M:1,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous) -// Test for correct output slots selected -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive1) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput2'}" - "node { name: 'N' op: '_MklInput2'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M:1', 'N:1']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);" - "M(_MklInput2);N(_MklInput2);Y(Input);Z(Zeta)|A->E;" - "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;" - "M:1->E:3;N:1->E:4;Y->Z:1"); -} - -// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y); -// This is a case of node rewrite followed by node merge. -// We will first rewrite Conv2D to _MklConv2D, and then merge _MklConv2D -// with BiasAdd to produce _MklConv2DWithBias. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive2) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|" - "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;" - "DMT/_2->E:5;E->Z;Y->Z:1"); -} - -// Graph contains only _MklConv2D, no AddBias. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);M(_MklInput);N(_MklInput)|" - "A->C;B->C:1;M->C:2;N->C:3"); -} - -// _MklConv2D output does not go to BiasAdd. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Input'}" - "node { name: 'F' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D', 'E'] }"); // Output of _MklConv2D does not go to BiasAdd. - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);" - "M(_MklInput);N(_MklInput)|A->C;B->C:1;D->F;E->F:1;M->C:2;N->C:3"); -} - -// _MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta). -// Merge should not be done in such case. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Input'}" - "node { name: 'F' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D', 'E'] }" // Conv2D has two outputs. - // No merge should happen. - "node { name: 'G' op: 'Zeta'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);" - "G(Zeta);M(_MklInput);N(_MklInput)|A->C;B->C:1;C->G;D->F;" - "E->F:1;E->G:1;M->C:2;N->C:3"); -} - -// data_format attribute value mismatch. Merge should not be done -// in such case. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHCW' } }" - " input: ['C', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);M(_MklInput);" - "N(_MklInput)|A->C;B->C:1;C->E;D->E:1;M->C:2;N->C:3"); -} - -// Test set 2: _MklConv2D..BiasAddGrad -> _MklConv2DWithBiasBackpropBias -// rewrite tests - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter -// and BackpropInput -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'F', 'E', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'Int32Input'}" - "node { name: 'I' op: '_MklConv2DBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['H', 'B', 'E', 'M', 'N', 'O']}" - "node { name: 'J' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);" - "I(_MklConv2DBackpropInput);J(_MklConv2DWithBiasBackpropBias);" - "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G;B->D:1;" - "B->I:1;C->D:2;D->E;DMT/_0->J:1;E->G:2;E->I:2;E->J;" - "E:control->DMT/_0:control;F->G:1;H->I;M->D:3;M->G:3;M->I:3;" - "N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5"); -} - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter -// and BackpropInput. But nodes do not match criteria for rewrite. So -// rewrite should not happen. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['E', 'F', 'A', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'Int32Input'}" - "node { name: 'I' op: '_MklConv2DBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['H', 'B', 'E', 'M', 'N', 'O']}" - "node { name: 'J' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);" - "I(_MklConv2DBackpropInput);J(BiasAddGrad);" - "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;" - "B->I:1;C->D:2;D->E;E->G;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;" - "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5"); -} - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter -// and BackpropInput. But nodes do not match criteria for rewrite. So -// rewrite should not happen. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['B', 'A', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'F', 'E', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'Int32Input'}" - "node { name: 'I' op: '_MklConv2DBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['H', 'B', 'E', 'M', 'N', 'O']}" - "node { name: 'J' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);" - "I(_MklConv2DBackpropInput);J(BiasAddGrad);" - "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;" - "B->I:1;C->D:2;D->E;E->G:2;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;" - "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5"); -} - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'F', 'E', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);" - "H(_MklConv2DWithBiasBackpropBias);M(_MklInput);N(_MklInput);" - "O(_MklInput)|A->D;A->E:1;A->G;B->D:1;C->D:2;D->E;DMT/_0->H:1;" - "E->G:2;E->H;E:control->DMT/_0:control;F->G:1;M->D:3;M->G:3;" - "N->D:4;N->G:4;O->D:5;O->G:5"); -} - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only -// But BackpropFilter node inputs do not satisfy criteria for rewrite. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['E', 'F', 'A', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);" - "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;" - "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;" - "O->G:5"); -} - -// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only -// But BackpropFilter node inputs do not satisfy criteria for rewrite. -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['B', 'A', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'Int32Input'}" - "node { name: 'G' op: '_MklConv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'F', 'E', 'M', 'N', 'O'] }" - "node { name: 'H' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);" - "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);" - "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;" - "C->D:2;D->E;E->G:2;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;" - "O->G:5"); -} - -// No _MklConv2DWithBias in context, but _MklConv2D in context. -// No rewrite for BiasAddGrad should happen. -// C=_MklConv2D(A,M,B,N); D=Zeta(C,A); E=BiasAddGrad(D) (for interleaved) -// C=_MklConv2D(A,B,M,N); D=Zeta(C,A); E=BiasAddGrad(D) (for contiguous) -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);" - "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;" - "M->C:2;N->C:3"); -} - -// No Conv2D in the context for BiasAddGrad. No rewrite should happen. -// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D) -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Polygamma'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|" - "A->C;A->D:1;B->C:1;C->D;D->E"); -} - -// No Conv2D in the context for BiasAddGrad, but MatMul in context. -// Rewrite should happen, but name of BiasAddGrad does not change. -// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D) -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'MatMul'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'transpose_a' value { b: false } }" - " attr { key: 'transpose_b' value { b: false } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|" - "A->C;A->D:1;B->C:1;C->D;D->E"); -} - -// Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests -// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D) -TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'MatMul'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'transpose_a' value { b: false } }" - " attr { key: 'transpose_b' value { b: false } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|" - "A->C;A->D:1;B->C:1;C->D;D->E"); -} - -// No MatMul in the context for BiasAddGrad. No rewrite should happen. -// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D) -TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Negative_NoMatMul) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Polygamma'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|" - "A->C;A->D:1;B->C:1;C->D;D->E"); -} - -///////////////////////////////////////////////////////////////////// -// Unit tests related to rewriting node to Mkl node -///////////////////////////////////////////////////////////////////// - -// Single Conv2D Op; No Mkl layer on the input and on the output. -// We will generate dummy Mkl tensor as 2nd input of Conv2D. -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['B', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;" - "DMT/_1->C:3"); -} - -// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will -// have 2 outputs, both of which will be inputs to next Conv2D. -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;" - "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2"); -} - -// Conv2D with INT32 which is not supported by Mkl -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) { - InitGraph( - "node { name: 'A' op: 'HalfInput'}" - "node { name: 'B' op: 'HalfInput'}" - "node { name: 'C' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_HALF } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }" - " input: ['B', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|" - "A->C;B->C:1;B->D;C->D:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Conv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:4;DMT/_2->D:5"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Conv2DBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['B', 'A', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D:1;A->E;B->D;B:control->DMT/_0:control;" - "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;" - "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); -} - -// Concat Op test: Concat with no Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['A', 'B:0', 'B:1']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;" - "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); -} - -// Concat with 2 Mkl layers feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['G', 'E', 'F']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" - "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;" - "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;" - "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;" - "G:control->DMT/_4:control;H->I:1"); -} - -// Concat with 1 Mkl and 1 non-Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['G', 'E', 'F']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" - "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;" - "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1"); -} - -// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['B:0', 'B:1', 'A']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;" - "B:control->DMT/_0:control;B:control->DMT/_1:control;" - "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:4;DMT/_2->D:5"); -} - -// ConcatV2 with 2 Mkl layers feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['E', 'F', 'G']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" - "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;" - "C:control->DMT/_0:control;C:control->DMT/_1:control;" - "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;" - "F:2->H:4;G->H:2;H->I:1"); -} - -// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['E', 'F', 'G']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" - "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;" - "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;" - "G->H:2;H->I:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" - "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'ReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'ReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" - "DMT/_1->C:2"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'AvgPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;" - "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Int32Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'AvgPoolGrad' " - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['B', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;" - "DMT/_1->C:3"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'I' op: 'Int32Input'}" - "node { name: 'B' op: 'AvgPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A'] }" - "node { name: 'C' op: 'AvgPoolGrad' " - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['I', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;" - "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;" - "I:control->DMT/_1:control"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Input'}" - "node { name: 'F' op: 'FusedBatchNormGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'epsilon' value { f: 0.0001 } }" - " attr { key: 'is_training' value { b: true } }" - " input: ['A', 'B', 'C', 'D', 'E'] }" - "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'F'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);" - "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;" - "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;" - "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;" - "E->F:4;F->G:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Input'}" - "node { name: 'F' op: 'FusedBatchNorm'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'epsilon' value { f: 0.0001 } }" - " attr { key: 'is_training' value { b: true } }" - " input: ['A', 'B', 'C', 'D', 'E'] }" - "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'F'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);" - "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;" - "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;" - "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;" - "E->F:4;F->G:1"); -} - -///////////////////////////////////////////////////////////////////// -// Unit tests related to rewriting node for workspace edges -///////////////////////////////////////////////////////////////////// - -/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */ -TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LRN'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['A'] }" - "node { name: 'C' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['B'] }" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'MaxPoolGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['B', 'C', 'D'] }" - "node { name: 'F' op: 'Input'}" - "node { name: 'G' op: 'LRNGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['E', 'F', 'B'] }" - "node { name: 'H' op: 'Input'}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['H', 'G'] }"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);" - "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;" - "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;" - "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;" - "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I"); -} - -/* Test LRN->LRNGrad replacement by workspace nodes. */ -TEST_F(MklLayoutPassTest, LRN_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LRN'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['A'] }" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'LRNGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['C', 'D', 'B'] }" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|" - "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;" - "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;" - "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1"); -} - -/* Test LRN->LRNGrad replacement when only one of them is present. */ -TEST_F(MklLayoutPassTest, LRN_Negative1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LRN'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|" - "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); -} - -/* Test LRN->LRNGrad replacement when only one of them is present. */ -TEST_F(MklLayoutPassTest, LRN_Negative2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'LRNGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['A', 'B', 'C'] }" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|" - "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;" - "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6"); -} - -/* Test LRN->LRNGrad negative case, where single LRN feeds - 2 LRNGrad nodes at different slots. */ -TEST_F(MklLayoutPassTest, LRN_Negative3) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LRN'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['A'] }" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'LRNGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['C', 'D', 'B'] }" - "node { name: 'F' op: 'LRNGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.001 } }" - " attr { key: 'beta' value { f: 0.75 } }" - " attr { key: 'bias' value { f: 1.0 } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'depth_radius' value { i: 2 } }" - " input: ['C', 'B', 'D'] }" - "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'F'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);" - "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;" - "A:control->DMT/_0:control;B->E:2;" - "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;" - "C:control->DMT/_1:control;C:control->DMT/_2:control;" - "C:control->DMT/_3:control;C:control->DMT/_4:control;" - "C:control->DMT/_5:control;C:control->DMT/_6:control;" - "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;" - "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1"); -} - -/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */ -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'MaxPoolGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['C', 'B', 'D'] }" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'E'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|" - "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;" - "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;" - "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1"); -} - -// Test MaxPool>MaxPoolGrad replacement when only one of them is present. -// In this case, we will rewrite MaxPool node but workspace edges will not -// be present. -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|" - "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); -} - -// Test MaxPoolGrad replacement when only one of them is present. -// In this case, we will rewrite MaxPoolGrad and for workspace tensor and -// its Mkl part, we will generate dummy tensor. -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'MaxPoolGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:3, i:3} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:2, i:2} } }" - " input: ['A', 'B', 'C'] }" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|" - "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;A:control->DMT/_3:control;" - "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6"); -} - -// Test MaxPool handling for batch-wise pooling (NCHW) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 2, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for batch-wise pooling (NCHW) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 2, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for depth-wise pooling (NHWC) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:2, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for depth-wise pooling (NCHW) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:2, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for batch-wise pooling (NHWC) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 2, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for batch-wise pooling (NHWC) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 2, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for depth-wise pooling (NHWC) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:2} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Test MaxPool handling for depth-wise pooling (NHWC) -// No rewrite should take place in such case -TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:2} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -///////////////////////////////////////////////////////////////////// - -// Single Conv2D Op on GPU device -// No rewrite should happen -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['B', 'C'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1"); -} - -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'O' op: '_MklInput'}" - "node { name: 'D' op: '_MklConv2DWithBias'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C', 'M', 'N', 'O']}" - "node { name: 'E' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['D', 'A']}" - "node { name: 'F' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['E'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);" - "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);" - "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;" - "M->D:3;N->D:4;O->D:5"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Conv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|" - "A->D;A->E;B->D:1;C->D:2;D->E:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'ReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'MaxPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'AvgPool'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'ksize' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1"); -} - -// Concat Op test: Concat with no Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['A', 'B:0', 'B:1']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;" - "B->D:1;B:1->D:2;C->E;D->E:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['B:0', 'B:1', 'A']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|" - "A->D:2;B->D;B:1->D:1;C->E;D->E:1"); -} - -TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Input'}" - "node { name: 'F' op: 'FusedBatchNorm'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'epsilon' value { f: 0.0001 } }" - " attr { key: 'is_training' value { b: true } }" - " input: ['A', 'B', 'C', 'D', 'E'] }" - "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'F'] }", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);E(Input);" - "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;" - "E->F:4;F->G:1"); -} - -TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'BiasAdd'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}", - kGPUDevice); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);" - "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;" - "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1"); -} - -///////////////////////////////////////////////////////////////////// - -static void BM_MklLayoutRewritePass(int iters, int op_nodes) { - testing::StopTiming(); - string s; - for (int in = 0; in < 10; in++) { - s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in); - } - random::PhiloxRandom philox(301, 17); - random::SimplePhilox rnd(&philox); - for (int op = 0; op < op_nodes; op++) { - s += strings::Printf( - "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { " - "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }", - op, rnd.Uniform(10), rnd.Uniform(10)); - } - - bool first = true; - while (iters > 0) { - Graph* graph = new Graph(OpRegistry::Global()); - InitGraph(s, graph); - int N = graph->num_node_ids(); - if (first) { - testing::SetLabel(strings::StrCat("Per graph node. Nodes: ", N)); - first = false; - } - { - testing::StartTiming(); - std::unique_ptr ug(graph); - RunMklLayoutRewritePass(&ug); - testing::StopTiming(); - } - iters -= N; // Our benchmark units are individual graph nodes, - // not whole graphs - // delete graph; - } -} -BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000); - -} // namespace - -#else // INTEL_MKL_ML_ONLY - // NOTE: Unit tests in this file rely on a topological sorted graph for // printing. But since sibling nodes of a node in the topologically sorted graph // can be printed in different orders, tests may fail if the order in which @@ -3602,8 +1739,6 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000); } // namespace -#endif // INTEL_MKL_ML_ONLY - } // namespace tensorflow #endif // INTEL_MKL && ENABLE_MKL -- GitLab From 6123677f264c615042a816e713f7f1204685e544 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Fri, 5 Oct 2018 14:18:41 -0700 Subject: [PATCH 026/411] Fix bug in nonpip builds in ci_parameterized_build.sh The extra spaces were confusing bash's string-line-continuation from the backslash `\` on the previous line. PiperOrigin-RevId: 215964853 --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index fdff867ff0..489722c0e9 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -423,7 +423,7 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] || [[ ${CTYPE} == "debian.jessie.cpu" ]]; then # CPU only command, fully parallel. NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\ - "${EXTRA_ARGS} -- ${BAZEL_TARGET}" +"${EXTRA_ARGS} -- ${BAZEL_TARGET}" elif [[ ${CTYPE} == gpu* ]]; then # GPU only command, run as many jobs as the GPU count only. NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\ -- GitLab From c221f04b7efff5929f3a6d090983b52f3aa16166 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 5 Oct 2018 14:44:47 -0700 Subject: [PATCH 027/411] Automated rollback of commit ae0bc6f006497cc04a2ee75166d4ec71c7154fd8 PiperOrigin-RevId: 215969360 --- tensorflow/core/kernels/data/BUILD | 14 -- tensorflow/core/kernels/data/dataset_utils.cc | 47 ----- tensorflow/core/kernels/data/dataset_utils.h | 20 -- .../core/kernels/data/dataset_utils_test.cc | 46 ----- .../core/kernels/data/filter_dataset_op.cc | 162 +++++++++------- .../kernels/data/map_and_batch_dataset_op.cc | 180 +++++++----------- .../core/kernels/data/map_dataset_op.cc | 56 ++---- .../kernels/data/parallel_map_dataset_op.cc | 73 +++---- .../kernels/data/parallel_map_iterator.cc | 17 +- .../core/kernels/data/parallel_map_iterator.h | 2 +- .../kernels/data/parse_example_dataset_op.cc | 2 +- .../kernel_tests/map_and_batch_test.py | 20 -- .../kernel_tests/filter_dataset_op_test.py | 2 +- .../data/kernel_tests/map_dataset_op_test.py | 80 ++------ .../python/data/kernel_tests/test_base.py | 29 --- 15 files changed, 230 insertions(+), 520 deletions(-) delete mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 37c1c54786..451f8c1a6c 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -45,16 +45,6 @@ cc_library( ], ) -tf_cc_test( - name = "dataset_utils_test", - srcs = ["dataset_utils_test.cc"], - deps = [ - ":dataset_utils", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - cc_library( name = "captured_function", srcs = ["captured_function.cc"], @@ -215,7 +205,6 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", - ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -243,7 +232,6 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", - ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -257,7 +245,6 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", - ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -298,7 +285,6 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", - ":dataset_utils", ":parallel_map_iterator", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index a40f7f2146..e10833f525 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -15,57 +15,10 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/common_runtime/device.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/lib/gtl/cleanup.h" namespace tensorflow { namespace data { -Status ComputeShortCircuitIndices(OpKernelContext* ctx, - const NameAttrList& func, - std::vector* indices) { - FunctionLibraryRuntime::Handle fn_handle; - TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate( - func.name(), AttrSlice(&func.attr()), &fn_handle)); - auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() { - Status s = ctx->function_library()->ReleaseHandle(fn_handle); - if (!s.ok()) { - LOG(WARNING) << "Failed to release handle: " << s.error_message(); - } - }); - - const FunctionBody* fn_body = - ctx->function_library()->GetFunctionBody(fn_handle); - indices->resize(fn_body->ret_nodes.size()); - for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) { - Node* ret_node = fn_body->ret_nodes[i]; - Node* ret_input_node; - TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node)); - if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) { - TF_RETURN_IF_ERROR( - GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i]))); - } else { - indices->clear(); - break; - } - } - return Status::OK(); -} - -std::vector ComputeMoveVector(const std::vector& indices) { - std::map last_use; - for (size_t i = 0; i < indices.size(); ++i) { - last_use[indices[i]] = i; - } - std::vector can_move; - can_move.resize(indices.size()); - for (size_t i = 0; i < indices.size(); ++i) { - can_move[i] = last_use[indices[i]] == i; - } - return can_move; -} - Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index d777062293..6ec1350cd4 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -22,26 +22,6 @@ limitations under the License. namespace tensorflow { namespace data { -// This method is used to determine whether we can short-circuit the evaluation -// of the user-defined function `func`. Short-circuting is possible if every -// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) = -// (y,x)`, or `f(x) = (x,x)`). -// -// If short-circuiting is possible, the method stores the mapping from output -// indices to input indices in `indices`. Otherwise, `indices` will be empty. -// -// Returns non-ok status if analysis of the function fails. -// -// TODO(jsimsa): Extend this to support constants as well. -Status ComputeShortCircuitIndices(OpKernelContext* ctx, - const NameAttrList& func, - std::vector* indices); - -// Given a vector that maps output indices to input indices, return a vector -// that identifies for which output indices can we move the input (assuming -// output indices are processed left to right). -std::vector ComputeMoveVector(const std::vector& indices); - Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc deleted file mode 100644 index 43295b8ebb..0000000000 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/kernels/data/dataset_utils.h" - -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace data { -namespace { - -TEST(DatasetUtils, ComputeMoveVector) { - struct TestCase { - std::vector indices; - std::vector expected; - }; - - TestCase test_cases[] = { - TestCase{{}, {}}, - TestCase{{1}, {true}}, - TestCase{{1, 1}, {false, true}}, - TestCase{{1, 2}, {true, true}}, - TestCase{{1, 1, 2}, {false, true, true}}, - TestCase{{1, 2, 2}, {true, false, true}}, - }; - - for (auto& test_case : test_cases) { - EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices)); - } -} - -} // namespace -} // namespace data -} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index be7d182a1f..00884314a9 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -18,11 +18,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" -#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -33,84 +31,67 @@ namespace { class FilterDatasetOp : public UnaryDatasetOpKernel { public: - using FilterIteratorPredicate = - std::function, bool*)>; - explicit FilterDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx) { + : UnaryDatasetOpKernel(ctx), + graph_def_version_(ctx->graph_def_version()) { OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_)); } void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { + FunctionLibraryRuntime::Handle pred_handle; + OP_REQUIRES_OK(ctx, + ctx->function_library()->Instantiate( + func_.name(), AttrSlice(&func_.attr()), &pred_handle)); + auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() { + OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle)); + }); + + const FunctionBody* pred_body = + ctx->function_library()->GetFunctionBody(pred_handle); + OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1, + errors::InvalidArgument( + "predicate function must have a single return value.")); + Node* ret_node = pred_body->ret_nodes[0]; + Node* ret_input_node; + OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node)); + std::unique_ptr captured_func; OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", &captured_func)); - std::vector indices; - OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); - OP_REQUIRES(ctx, indices.size() <= 1, - errors::InvalidArgument( - "predicate function has more than one return value.")); - - FilterIteratorPredicate filter_pred; - if (indices.empty()) { - CapturedFunction* raw_captured_func = captured_func.get(); - filter_pred = [raw_captured_func](IteratorContext* ctx, - const std::vector& args, - bool* out_matched) { - std::vector result; - TF_RETURN_IF_ERROR( - raw_captured_func->RunWithBorrowedArgs(ctx, args, &result)); - - if (result.size() != 1 || result[0].dtype() != DT_BOOL || - result[0].NumElements() != 1) { - return errors::InvalidArgument( - "Filter predicate `f` must return a scalar bool."); - } - *out_matched = result[0].scalar()(); - return Status::OK(); - }; + if (ret_input_node->def().op() == "_Arg") { + int32 index = -1; + OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index)); + *output = new FilterTensorDataset(ctx, input, func_, + std::move(captured_func), index); } else { - filter_pred = [indices](IteratorContext* ctx, - const std::vector& args, - bool* out_matched) { - const Tensor& predicate = args[indices[0]]; - if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { - return errors::InvalidArgument( - "Filter predicate `f` must return a scalar bool."); - } - *out_matched = predicate.scalar()(); - return Status::OK(); - }; + *output = new FilterFunctionDataset(ctx, input, func_, + std::move(captured_func)); } - - *output = new Dataset(ctx, input, func_, std::move(captured_func), - std::move(filter_pred)); } private: - class Dataset : public DatasetBase { + const int graph_def_version_; + + class FilterDatasetBase : public DatasetBase { public: - Dataset(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, - std::unique_ptr captured_func, - FilterIteratorPredicate filter_pred) + FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), - captured_func_(std::move(captured_func)), - filter_pred_(std::move(filter_pred)) { + captured_func_(std::move(captured_func)) { input_->Ref(); } - ~Dataset() override { input_->Unref(); } + ~FilterDatasetBase() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return MakeUnique( - Iterator::Params{this, strings::StrCat(prefix, "::Filter")}, - filter_pred_); + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Filter")})); } const DataTypeVector& output_dtypes() const override { @@ -152,15 +133,17 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + virtual Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const = 0; + private: - class Iterator : public DatasetIterator { + class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params, - FilterIteratorPredicate filter_pred) - : DatasetIterator(params), + explicit Iterator(const Params& params) + : DatasetIterator(params), filtered_elements_(0), - dropped_elements_(0), - filter_pred_(std::move(filter_pred)) { + dropped_elements_(0) { std::vector components = str_util::Split(params.prefix, "::", str_util::SkipEmpty()); prefix_end_ = components.back(); @@ -197,7 +180,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched)); + TF_RETURN_IF_ERROR( + dataset()->EvaluatePredicate(ctx, *out_tensors, &matched)); if (!matched) { // Clear the output tensor list since it didn't match. out_tensors->clear(); @@ -267,14 +251,64 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_ GUARDED_BY(mu_); int64 filtered_elements_ GUARDED_BY(mu_); int64 dropped_elements_ GUARDED_BY(mu_); - const FilterIteratorPredicate filter_pred_; string prefix_end_; }; const DatasetBase* const input_; const NameAttrList func_; + + protected: const std::unique_ptr captured_func_; - const FilterIteratorPredicate filter_pred_; + }; + + class FilterFunctionDataset : public FilterDatasetBase { + public: + using FilterDatasetBase::FilterDatasetBase; + + protected: + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { + // TODO(mrry): Avoid blocking a threadpool thread. We will need to + // stack-rip the iterators and use async kernels. + std::vector result; + TF_RETURN_IF_ERROR( + captured_func_->RunWithBorrowedArgs(ctx, element, &result)); + + if (result.size() != 1 || result[0].dtype() != DT_BOOL || + result[0].NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = result[0].scalar()(); + return Status::OK(); + } + }; + + class FilterTensorDataset : public FilterDatasetBase { + public: + FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func, + int32 index) + : FilterDatasetBase(ctx, input, func, std::move(captured_func)), + index_(index) {} + + protected: + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { + const Tensor& predicate = element[index_]; + if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = predicate.scalar()(); + return Status::OK(); + } + + private: + const int32 index_; }; private: diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index f9aaa3080e..bf08970560 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" -#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/inplace_ops_functor.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/gtl/cleanup.h" @@ -30,7 +29,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/tracing.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -43,10 +41,6 @@ namespace { // transformation more robust. class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { public: - using MapAndBatchIteratorFunction = - std::function, - std::shared_ptr>, StatusCallback)>; - explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx), op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) { @@ -97,66 +91,31 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", &captured_func)); - std::vector indices; - OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); - - MapAndBatchIteratorFunction map_func; - if (indices.empty()) { - CapturedFunction* raw_captured_func = captured_func.get(); - map_func = [raw_captured_func]( - IteratorContext* ctx, const string& prefix, - std::vector args, - std::shared_ptr> out_tensors, - StatusCallback done) { - raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(), - std::move(done), prefix); - }; - } else { - std::vector can_move = ComputeMoveVector(indices); - map_func = [indices, can_move]( - IteratorContext* ctx, const string& prefix, - std::vector args, - std::shared_ptr> out_tensors, - StatusCallback done) { - for (size_t i = 0; i < indices.size(); ++i) { - if (can_move[i]) { - out_tensors->push_back(std::move(args[indices[i]])); - } else { - out_tensors->push_back(args[indices[i]]); - } - } - done(Status::OK()); - }; - } - - *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls, - drop_remainder, output_types_, output_shapes_, - std::move(captured_func), &ctx->eigen_cpu_device(), - std::move(map_func)); + *output = new Dataset(ctx, input, batch_size, num_parallel_calls, + drop_remainder, output_types_, output_shapes_, func_, + std::move(captured_func), &ctx->eigen_cpu_device()); } private: class Dataset : public DatasetBase { public: - Dataset(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, int64 batch_size, + Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size, int64 num_parallel_calls, bool drop_remainder, const DataTypeVector& output_types, const std::vector& output_shapes, + const NameAttrList& func, std::unique_ptr captured_func, - const Eigen::ThreadPoolDevice* device, - MapAndBatchIteratorFunction map_func) + const Eigen::ThreadPoolDevice* device) : DatasetBase(DatasetContext(ctx)), input_(input), - func_(func), batch_size_(batch_size), num_parallel_calls_(num_parallel_calls), drop_remainder_(drop_remainder), output_types_(output_types), output_shapes_(output_shapes), + map_fn_(func), captured_func_(std::move(captured_func)), - device_(device), - map_func_(std::move(map_func)) { + device_(device) { input_->Ref(); } @@ -164,9 +123,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return MakeUnique( - Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")}, - map_func_); + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")})); } const DataTypeVector& output_dtypes() const override { @@ -185,7 +143,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status AsGraphDefInternal(SerializationContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { - TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name())); Node* input_graph_node = nullptr; TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); Node* batch_size_node; @@ -207,7 +165,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { other_arguments_types.emplace_back(t.dtype()); } AttrValue f; - b->BuildAttrValue(func_, &f); + b->BuildAttrValue(map_fn_, &f); AttrValue other_arguments_types_attr; b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); @@ -227,14 +185,12 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { private: class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params, - MapAndBatchIteratorFunction map_func) + explicit Iterator(const Params& params) : DatasetIterator(params), mu_(std::make_shared()), cond_var_(std::make_shared()), num_parallel_calls_(std::make_shared( - params.dataset->num_parallel_calls_, mu_, cond_var_)), - map_func_(std::move(map_func)) {} + params.dataset->num_parallel_calls_, mu_, cond_var_)) {} ~Iterator() override { mutex_lock l(*mu_); @@ -341,6 +297,44 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { int64 num_calls; // access guarded by owner's mutex }; + void Callback(const std::shared_ptr& ctx, + const std::shared_ptr& result, + const std::shared_ptr>& return_values, + int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) { + result->UpdateStatus(status); + if (status.ok()) { + EnsureOutputAllocated(ctx, result, return_values); + for (size_t i = 0; i < return_values->size(); ++i) { + const Tensor& tensor = return_values->at(i); + Tensor* batch = &(result->output)[i]; + if (tensor.NumElements() != + (batch->NumElements() / batch->dim_size(0))) { + TensorShape batch_shape = batch->shape(); + batch_shape.RemoveDim(0); + result->UpdateStatus(errors::InvalidArgument( + "Cannot add tensor to the batch: number of elements does not " + "match. Shapes are: [tensor]: ", + tensor.shape().DebugString(), + ", [batch]: ", batch_shape.DebugString())); + break; + } + // TODO(mrry): Add a version of DoParallelConcat that allows us to + // move `tensor` where possible, to speed up string tensor batching. + Status copy_status = ::tensorflow::functor::DoParallelConcat( + *dataset()->device_, tensor, offset, batch); + if (!copy_status.ok()) { + result->UpdateStatus(copy_status); + break; + } + } + { + mutex_lock l(result->mu); + result->num_elements++; + } + } + CallCompleted(result); + } + void CallCompleted(const std::shared_ptr& result) LOCKS_EXCLUDED(*mu_) { mutex_lock l(*mu_); @@ -369,48 +363,21 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { return; } - std::shared_ptr> return_values = - std::make_shared>(); - auto done = [this, ctx, result, return_values, offset](Status status) { - result->UpdateStatus(status); - if (status.ok()) { - EnsureOutputAllocated(ctx, result, return_values); - for (size_t i = 0; i < return_values->size(); ++i) { - const Tensor& tensor = return_values->at(i); - Tensor* batch = &(result->output)[i]; - if (tensor.NumElements() != - (batch->NumElements() / batch->dim_size(0))) { - TensorShape batch_shape = batch->shape(); - batch_shape.RemoveDim(0); - result->UpdateStatus(errors::InvalidArgument( - "Cannot add tensor to the batch: number of elements does " - "not match. Shapes are: [tensor]: ", - tensor.shape().DebugString(), - ", [batch]: ", batch_shape.DebugString())); - break; - } - // TODO(mrry): Add a version of DoParallelConcat that allows us to - // move `tensor` where possible, to speed up string tensor - // batching. - Status copy_status = ::tensorflow::functor::DoParallelConcat( - *dataset()->device_, tensor, offset, batch); - if (!copy_status.ok()) { - result->UpdateStatus(copy_status); - break; - } - } - { - mutex_lock l(result->mu); - result->num_elements++; - } - } - CallCompleted(result); - }; - - // Apply the map function on `input_element`, storing the result in - // `return_values`, and invoking `done` when finished. - map_func_(ctx.get(), prefix(), std::move(input_element), - std::move(return_values), std::move(done)); + // Call `captured_func_(input_element)`, using `Callback` to store the + // result in `result`. + (*ctx->runner())(std::bind( + [this, result, offset](std::shared_ptr ctx, + std::vector input_element) { + std::shared_ptr> return_values( + new std::vector()); + dataset()->captured_func_->RunAsync( + ctx.get(), std::move(input_element), return_values.get(), + [this, ctx, result, return_values, offset](Status status) { + Callback(ctx, result, return_values, offset, status); + }, + prefix()); + }, + ctx, std::move(input_element))); } Status CopyPartialBatch(Tensor* output, const Tensor& value, @@ -437,7 +404,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { void EnsureRunnerThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { - auto ctx_copy = std::make_shared(*ctx); + std::shared_ptr ctx_copy(new IteratorContext(*ctx)); runner_thread_.reset(ctx->env()->StartThread( {}, "runner_thread", std::bind(&Iterator::RunnerThread, this, ctx_copy))); @@ -542,8 +509,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { while (!busy()) { if (call_counter_ % dataset()->batch_size_ == 0) { - batch_results_.push_back( - std::make_shared(dataset()->batch_size_)); + batch_results_.emplace_back( + new BatchResult(dataset()->batch_size_)); } int64 offset = call_counter_++ % dataset()->batch_size_; new_calls.emplace_back(batch_results_.back(), offset); @@ -560,8 +527,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader, size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { - batch_results_.push_back( - std::make_shared(dataset()->batch_size_)); + batch_results_.emplace_back(new BatchResult(dataset()->batch_size_)); std::shared_ptr result = batch_results_.back(); string prefix = strings::StrCat("batch_results_", index); mutex_lock l(result->mu); @@ -687,8 +653,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const std::shared_ptr cond_var_; // Identifies the maximum number of parallel calls. const std::shared_ptr num_parallel_calls_; - const MapAndBatchIteratorFunction map_func_; - // Counts the number of outstanding calls for this batch. int64 num_calls_ GUARDED_BY(*mu_) = 0; // Counts the total number of calls. @@ -707,9 +671,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const bool drop_remainder_; const DataTypeVector output_types_; const std::vector output_shapes_; + const NameAttrList map_fn_; const std::unique_ptr captured_func_; const Eigen::ThreadPoolDevice* device_; // not owned - const MapAndBatchIteratorFunction map_func_; }; const int op_version_; diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index 0abb2eb4f3..f112e1dc43 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -17,9 +17,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" -#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/random/random.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -30,9 +28,6 @@ namespace { class MapDatasetOp : public UnaryDatasetOpKernel { public: - using MapIteratorFunction = std::function, std::vector*)>; - explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); @@ -48,36 +43,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { use_inter_op_parallelism_, &captured_func)); - std::vector indices; - OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); - - MapIteratorFunction map_func; - if (indices.empty()) { - CapturedFunction* raw_captured_func = captured_func.get(); - map_func = [raw_captured_func](IteratorContext* ctx, - std::vector args, - std::vector* out_tensors) { - return raw_captured_func->Run(ctx, std::move(args), out_tensors); - }; - } else { - std::vector can_move = ComputeMoveVector(indices); - map_func = [indices, can_move](IteratorContext* ctx, - std::vector args, - std::vector* out_tensors) { - std::map counts; - for (size_t i = 0; i < indices.size(); ++i) { - if (can_move[i]) { - out_tensors->push_back(std::move(args[indices[i]])); - } else { - out_tensors->push_back(args[indices[i]]); - } - } - return Status::OK(); - }; - } - *output = new Dataset(ctx, input, func_, std::move(captured_func), - output_types_, output_shapes_, std::move(map_func)); + output_types_, output_shapes_); } private: @@ -87,15 +54,13 @@ class MapDatasetOp : public UnaryDatasetOpKernel { const NameAttrList& func, std::unique_ptr captured_func, const DataTypeVector& output_types, - const std::vector& output_shapes, - MapIteratorFunction map_func) + const std::vector& output_shapes) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), captured_func_(std::move(captured_func)), output_types_(output_types), - output_shapes_(output_shapes), - map_func_(std::move(map_func)) { + output_shapes_(output_shapes) { input_->Ref(); } @@ -103,8 +68,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return MakeUnique( - Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_); + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Map")})); } const DataTypeVector& output_dtypes() const override { @@ -151,8 +116,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params, MapIteratorFunction map_func) - : DatasetIterator(params), map_func_(std::move(map_func)) {} + explicit Iterator(const Params& params) + : DatasetIterator(params) {} Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( @@ -174,7 +139,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - Status s = map_func_(ctx, args, out_tensors); + // TODO(mrry): Avoid blocking a threadpool thread. We will need to + // stack-rip the iterators and use async kernels. + Status s = + dataset()->captured_func_->Run(ctx, std::move(args), out_tensors); if (errors::IsOutOfRange(s)) { // `f` may deliberately raise `errors::OutOfRange` to indicate // that we should terminate the iteration early. @@ -199,7 +167,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: std::unique_ptr input_impl_; - const MapIteratorFunction map_func_; }; const DatasetBase* const input_; @@ -207,7 +174,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel { const std::unique_ptr captured_func_; const DataTypeVector output_types_; const std::vector output_shapes_; - const MapIteratorFunction map_func_; }; DataTypeVector output_types_; diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index a34bb172d4..6abe6c8338 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -19,7 +19,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" -#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/parallel_map_iterator.h" #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/random/random.h" @@ -57,49 +56,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { use_inter_op_parallelism_, &captured_func)); - std::vector indices; - OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); - - ParallelMapIteratorFunction map_func; - if (indices.empty()) { - CapturedFunction* raw_captured_func = captured_func.get(); - map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix, - std::vector args, - std::vector* out_tensors, - StatusCallback done) { - raw_captured_func->RunAsync(ctx, std::move(args), out_tensors, - std::move(done), prefix); - }; - if (!use_inter_op_parallelism_) { - map_func = [map_func](IteratorContext* ctx, const string& prefix, - std::vector args, - std::vector* out_tensors, - StatusCallback done) { - (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args), - out_tensors, std::move(done))); - }; - } - } else { - std::vector can_move = ComputeMoveVector(indices); - map_func = [indices, can_move](IteratorContext* ctx, const string& prefix, - std::vector args, - std::vector* out_tensors, - StatusCallback done) { - std::map counts; - for (size_t i = 0; i < indices.size(); ++i) { - if (can_move[i]) { - out_tensors->push_back(std::move(args[indices[i]])); - } else { - out_tensors->push_back(args[indices[i]]); - } - } - done(Status::OK()); - }; - } - *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_, output_shapes_, use_inter_op_parallelism_, - std::move(captured_func), std::move(map_func)); + std::move(captured_func)); } private: @@ -110,8 +69,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { const DataTypeVector& output_types, const std::vector& output_shapes, bool use_inter_op_parallelism, - std::unique_ptr captured_func, - ParallelMapIteratorFunction map_func) + std::unique_ptr captured_func) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), @@ -119,8 +77,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { output_types_(output_types), output_shapes_(output_shapes), use_inter_op_parallelism_(use_inter_op_parallelism), - captured_func_(std::move(captured_func)), - map_func_(std::move(map_func)) { + captured_func_(std::move(captured_func)) { input_->Ref(); } @@ -132,9 +89,26 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { return captured_func_->Instantiate(ctx); }; - return NewParallelMapIterator( - {this, strings::StrCat(prefix, "::ParallelMap")}, input_, - std::move(init_func), map_func_, num_parallel_calls_); + const string& new_prefix = strings::StrCat(prefix, "::ParallelMap"); + ParallelMapIteratorFunction map_func = + [this, new_prefix](IteratorContext* ctx, + std::vector input_element, + std::vector* result, StatusCallback done) { + captured_func_->RunAsync(ctx, std::move(input_element), result, + std::move(done), new_prefix); + }; + if (!use_inter_op_parallelism_) { + map_func = [map_func]( + IteratorContext* ctx, std::vector input_element, + std::vector* result, StatusCallback done) { + (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), + result, std::move(done))); + }; + } + + return NewParallelMapIterator({this, new_prefix}, input_, + std::move(init_func), std::move(map_func), + num_parallel_calls_); } const DataTypeVector& output_dtypes() const override { @@ -202,7 +176,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { const std::vector output_shapes_; const bool use_inter_op_parallelism_; const std::unique_ptr captured_func_; - const ParallelMapIteratorFunction map_func_; }; DataTypeVector output_types_; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index ebf41925c9..13bd4b6036 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -180,7 +179,7 @@ class ParallelMapIterator : public DatasetBaseIterator { void EnsureRunnerThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { - auto ctx_copy = std::make_shared(*ctx); + std::shared_ptr ctx_copy(new IteratorContext(*ctx)); runner_thread_.reset(ctx->env()->StartThread( {}, "runner_thread", std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy))); @@ -209,15 +208,15 @@ class ParallelMapIterator : public DatasetBaseIterator { return; } + // Call `func_(input_element)`, store the result in `result->return_values`, + // and notify `result->notification` to unblock a consumer. auto done = [this, result](Status status) { result->status.Update(status); CallCompleted(result); }; - // Apply the map function on `input_element`, storing the result in - // `result->return_values`, and invoking `done` when finished. - map_func_(ctx.get(), prefix(), std::move(input_element), - &result->return_values, std::move(done)); + map_func_(ctx.get(), std::move(input_element), &result->return_values, + std::move(done)); } Status ProcessResult(const std::shared_ptr& result, @@ -350,9 +349,9 @@ std::unique_ptr NewParallelMapIterator( const DatasetBase* input_dataset, std::function init_func, ParallelMapIteratorFunction map_func, int32 num_parallel_calls) { - return MakeUnique( - params, input_dataset, std::move(init_func), std::move(map_func), - num_parallel_calls); + return std::unique_ptr( + new ParallelMapIterator(params, input_dataset, std::move(init_func), + std::move(map_func), num_parallel_calls)); } } // namespace data diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h index 813f13c9e4..dc26c5cf25 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.h +++ b/tensorflow/core/kernels/data/parallel_map_iterator.h @@ -30,7 +30,7 @@ namespace data { // 3. A `std::vector*` to which the function will write the result. // 4. A `StatusCallback` that should be invoked when the function is complete. using ParallelMapIteratorFunction = - std::function, + std::function, std::vector*, StatusCallback)>; // Returns a new iterator that applies `map_func` to the elements of diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc index 7de5ea8860..1d1a717062 100644 --- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc +++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc @@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - auto map_fn = [this](IteratorContext* ctx, const string& prefix, + auto map_fn = [this](IteratorContext* ctx, std::vector input_element, std::vector* result, StatusCallback done) { (*ctx->runner())([this, ctx, input_element, result, done]() { diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py index 0703955fd4..afd0fc3abf 100644 --- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py @@ -332,26 +332,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): for _ in range(10): self.assertAllEqual([element for _ in range(10)], sess.run(get_next)) - @parameterized.named_parameters( - ("Identity", None, lambda x: x, None), - ("Replicate", None, lambda x: (x, x), None), - ("Swap", (None, None), lambda x, y: (y, x), None), - ("Project", (None, None), lambda x, y: x, None), - ) - def testShortCircuit(self, structure, map_fn, num_parallel_calls): - dataset = self.structuredDataset(structure).repeat().apply( - batching.map_and_batch(map_fn, batch_size=10)) - get_next = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - if isinstance(structure, tuple): - expected = map_fn( - *sess.run(self.structuredElement(structure, shape=[10]))) - else: - expected = map_fn( - sess.run(self.structuredElement(structure, shape=[10]))) - self.assertAllEqual(expected, sess.run(get_next)) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py index a0c6b37a6d..6b7afafa5d 100644 --- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py @@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testShortCircuit(self): + def testReturnComponent(self): iterator = ( dataset_ops.Dataset.zip( (dataset_ops.Dataset.range(10), diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 6efbe31ca1..0c372ebb10 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - self.assertIsInstance(actual, sparse_tensor.SparseTensorValue) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) self.assertSparseValuesEqual(actual, _sparse(i)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - self.assertIsInstance(actual, sparse_tensor.SparseTensorValue) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval()) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -783,57 +783,19 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertTrue(all(tids[0] == tid for tid in tids)) # pylint: enable=g-long-lambda - @parameterized.named_parameters( - ("SequentialIdentity", None, lambda x: x, None), - ("SequentialReplicate", None, lambda x: (x, x), None), - ("SequentialSwap", (None, None), lambda x, y: (y, x), None), - ("SequentialProject", (None, None), lambda x, y: x, None), - ("ParallelIdentity", None, lambda x: x, 10), - ("ParallelReplicate", None, lambda x: (x, x), 10), - ("ParallelSwap", (None, None), lambda x, y: (y, x), 10), - ("ParallelProject", (None, None), lambda x, y: x, 10), - ) - def testShortCircuit(self, structure, map_fn, num_parallel_calls): - dataset = self.structuredDataset(structure).repeat().map( - map_fn, num_parallel_calls=num_parallel_calls) - get_next = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - if isinstance(structure, tuple): - expected = map_fn(*sess.run(self.structuredElement(structure))) - else: - expected = map_fn(sess.run(self.structuredElement(structure))) - self.assertEqual(expected, sess.run(get_next)) - class MapDatasetBenchmark(test.Benchmark): def benchmarkChainOfMaps(self): chain_lengths = [0, 1, 2, 5, 10, 20, 50] for chain_length in chain_lengths: - for mode in ["general", "single-threaded", "short-circuit"]: - if mode == "general": - map_fn = lambda x: x + 1 - use_inter_op_parallelism = True - print_label = "" - benchmark_label = "" - if mode == "single-threaded": - map_fn = lambda x: x + 1 - use_inter_op_parallelism = False - print_label = " (single threaded mode)" - benchmark_label = "_single_threaded" - if mode == "short-circuit": - map_fn = lambda x: x - use_inter_op_parallelism = True # should not have any significance - print_label = " (short circuit mode)" - benchmark_label = "_short_circuit" - + for use_inter_op_parallelism in [False, True]: with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) for _ in range(chain_length): dataset = dataset_ops.MapDataset( dataset, - map_fn, + lambda x: x, use_inter_op_parallelism=use_inter_op_parallelism) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() @@ -851,39 +813,25 @@ class MapDatasetBenchmark(test.Benchmark): median_wall_time = np.median(deltas) / 100 print("Map dataset chain length%s: %d Median wall time: %f" % - (print_label, chain_length, median_wall_time)) + (" (single threaded mode)" if not use_inter_op_parallelism + else "", chain_length, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, name="benchmark_map_dataset_chain_latency_%d%s" % - (chain_length, benchmark_label)) + (chain_length, "_single_threaded" + if not use_inter_op_parallelism else "")) def benchmarkMapFanOut(self): fan_outs = [1, 2, 5, 10, 20, 50, 100] for fan_out in fan_outs: - for mode in ["general", "single-threaded", "short-circuit"]: - if mode == "general": - map_fn = lambda *xs: [x + 1 for x in xs] - use_inter_op_parallelism = True - print_label = "" - benchmark_label = "" - if mode == "single-threaded": - map_fn = lambda *xs: [x + 1 for x in xs] - use_inter_op_parallelism = False - print_label = " (single threaded mode)" - benchmark_label = "_single_threaded" - if mode == "short-circuit": - map_fn = lambda *xs: xs - use_inter_op_parallelism = True # should not have any significance - print_label = " (short circuit mode)" - benchmark_label = "_short_circuit" - + for use_inter_op_parallelism in [False, True]: with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors( tuple(0 for _ in range(fan_out))).repeat(None) dataset = dataset_ops.MapDataset( dataset, - map_fn, + lambda *xs: xs, use_inter_op_parallelism=use_inter_op_parallelism) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() @@ -901,12 +849,14 @@ class MapDatasetBenchmark(test.Benchmark): median_wall_time = np.median(deltas) / 100 print("Map dataset fan out%s: %d Median wall time: %f" % - (print_label, fan_out, median_wall_time)) + (" (single threaded mode)" if not use_inter_op_parallelism + else "", fan_out, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, - name="benchmark_map_dataset_fan_out_%d%s" % (fan_out, - benchmark_label)) + name="benchmark_map_dataset_fan_out_%d%s" % + (fan_out, "_single_threaded" + if not use_inter_op_parallelism else "")) if __name__ == "__main__": diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py index b73a94e683..b730e10949 100644 --- a/tensorflow/python/data/kernel_tests/test_base.py +++ b/tensorflow/python/data/kernel_tests/test_base.py @@ -19,13 +19,10 @@ from __future__ import print_function import re -from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.eager import context -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -110,29 +107,3 @@ class DatasetTestBase(test.TestCase): with self.assertRaisesRegexp(exception_class, re.escape(expected_message)): self.evaluate(next2()) - - def structuredDataset(self, structure, shape=None, dtype=dtypes.int64): - """Returns a singleton dataset with the given structure.""" - if shape is None: - shape = [] - if structure is None: - return dataset_ops.Dataset.from_tensors( - array_ops.zeros(shape, dtype=dtype)) - else: - return dataset_ops.Dataset.zip( - tuple([ - self.structuredDataset(substructure, shape, dtype) - for substructure in structure - ])) - - def structuredElement(self, structure, shape=None, dtype=dtypes.int64): - """Returns an element with the given structure.""" - if shape is None: - shape = [] - if structure is None: - return array_ops.zeros(shape, dtype=dtype) - else: - return tuple([ - self.structuredElement(substructure, shape, dtype) - for substructure in structure - ]) -- GitLab From 07921022ddc68aacbf210acc62545a90e3091fb1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 14:57:15 -0700 Subject: [PATCH 028/411] Add deprecation call-out for tf_mobile PiperOrigin-RevId: 215971335 --- .../lite/g3doc/tfmobile/android_build.md | 18 +++++++++++++++++- .../contrib/lite/g3doc/tfmobile/index.md | 18 +++++++++++++++++- .../contrib/lite/g3doc/tfmobile/ios_build.md | 18 +++++++++++++++++- .../lite/g3doc/tfmobile/linking_libs.md | 18 +++++++++++++++++- .../contrib/lite/g3doc/tfmobile/optimizing.md | 18 +++++++++++++++++- .../lite/g3doc/tfmobile/prepare_models.md | 18 +++++++++++++++++- 6 files changed, 102 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md index b0f32a8d6c..2eb776d10c 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on Android +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ To get you started working with TensorFlow on Android, we'll walk through two ways to build our TensorFlow mobile demos and deploying them on an Android device. The first is Android Studio, which lets you build and deploy in an diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md index 49ad35d4e6..15f0fd3961 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md @@ -1,6 +1,22 @@ - # Overview +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ TensorFlow was designed to be a good deep learning solution for mobile platforms. Currently we have two solutions for deploying machine learning applications on mobile and embedded devices: TensorFlow for Mobile and diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md index be8b4100c8..d922907cdc 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on iOS +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ ## Using CocoaPods The simplest way to get started with TensorFlow on iOS is using the CocoaPods diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md index 4d4bb3bc08..fd0e322c93 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md @@ -1,6 +1,22 @@ - # Integrating TensorFlow libraries +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ Once you have made some progress on a model that addresses the problem you’re trying to solve, it’s important to test it out inside your application immediately. There are often unexpected differences between your training data diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md index 7436594fd8..59ff8e774c 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md @@ -1,6 +1,22 @@ - # Optimizing for mobile +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ There are some special issues that you have to deal with when you’re trying to ship on mobile or embedded devices, and you’ll need to think about these as you’re developing your model. diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md index d1c67d4c61..1d373251dd 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md @@ -1,6 +1,22 @@ - # Preparing models for mobile deployment +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ The requirements for storing model information during training are very different from when you want to release it as part of a mobile app. This section covers the tools involved in converting from a training model to something -- GitLab From 1e104d80826fed95f9fad6f07f68e35cae3527b2 Mon Sep 17 00:00:00 2001 From: Geoffrey Irving Date: Wed, 19 Sep 2018 09:33:19 -0700 Subject: [PATCH 029/411] Expand stateless random generators to match their stateful cousins stateless_random_uniform now take minval+maxval and handles ints, and stateless_normal/stateless_truncated_normal take mean+stddev. Additionally, all of the stateless functions now have proper doc strings. This is step one of moving stateless random numbers out of contrib. --- tensorflow/contrib/stateless/BUILD | 5 +- tensorflow/contrib/stateless/__init__.py | 9 +- .../kernel_tests/stateless_random_ops_test.py | 156 ++++++------- .../contrib/stateless/python/stateless_ops.py | 214 ++++++++++++++++++ .../api_def_StatelessRandomUniformInt.pbtxt | 46 ++++ tensorflow/core/kernels/random_op.cc | 34 +-- .../core/kernels/stateless_random_ops.cc | 155 ++++++++----- tensorflow/core/ops/stateless_random_ops.cc | 53 +++-- 8 files changed, 491 insertions(+), 181 deletions(-) create mode 100644 tensorflow/contrib/stateless/python/stateless_ops.py create mode 100644 tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD index a217397c1a..e9ddec8889 100644 --- a/tensorflow/contrib/stateless/BUILD +++ b/tensorflow/contrib/stateless/BUILD @@ -11,7 +11,10 @@ load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") py_library( name = "stateless", - srcs = ["__init__.py"], + srcs = [ + "__init__.py", + "python/stateless_ops.py", + ], srcs_version = "PY2AND3", deps = [ "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py index fe23fe0dd8..30d0a7ab6a 100644 --- a/tensorflow/contrib/stateless/__init__.py +++ b/tensorflow/contrib/stateless/__init__.py @@ -32,16 +32,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import ops - # pylint: disable=wildcard-import -from tensorflow.python.ops.gen_stateless_random_ops import * +from tensorflow.contrib.stateless.python.stateless_ops import * from tensorflow.python.util.all_util import remove_undocumented -ops.NotDifferentiable("StatelessMultinomial") -ops.NotDifferentiable("StatelessRandomNormal") -ops.NotDifferentiable("StatelessRandomUniform") -ops.NotDifferentiable("StatelessTruncatedNormal") - remove_undocumented(__name__) diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py index d724a5c014..c0c1430d84 100644 --- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py +++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + import numpy as np from tensorflow.contrib import stateless from tensorflow.python.framework import constant_op @@ -27,10 +29,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test -CASES = [(stateless.stateless_random_uniform, random_ops.random_uniform), - (stateless.stateless_random_normal, random_ops.random_normal), - (stateless.stateless_truncated_normal, random_ops.truncated_normal)] - def invert_philox(key, value): """Invert the Philox bijection.""" @@ -51,96 +49,102 @@ def invert_philox(key, value): class StatelessOpsTest(test.TestCase): - def testMatchStateful(self): + def _test_match(self, cases): # Stateless ops should be the same as stateful ops on the first call # after seed scrambling. + cases = tuple(cases) key = 0x3ec8f720, 0x02461e29 for seed in (7, 17), (11, 5), (2, 3): preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64) preseed = preseed[::2] | preseed[1::2] << 32 random_seed.set_random_seed(seed[0]) with self.test_session(use_gpu=True): - for stateless_op, stateful_op in CASES: - for shape in (), (3,), (2, 5): - stateful = stateful_op(shape, seed=seed[1]) - pure = stateless_op(shape, seed=preseed) - self.assertAllEqual(stateful.eval(), pure.eval()) + for stateless_op, stateful_op in cases: + stateful = stateful_op(seed=seed[1]) + pure = stateless_op(seed=preseed) + self.assertAllEqual(stateful.eval(), pure.eval()) - def testDeterminism(self): + def _test_determinism(self, cases): # Stateless values should be equal iff the seeds are equal (roughly) + cases = tuple(cases) with self.test_session(use_gpu=True): for seed_type in [dtypes.int32, dtypes.int64]: seed_t = array_ops.placeholder(seed_type, shape=[2]) seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for stateless_op, _ in CASES: - for shape in (), (3,), (2, 5): - pure = stateless_op(shape, seed=seed_t) - values = [(seed, pure.eval(feed_dict={seed_t: seed})) - for seed in seeds] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) - - def testShapeType(self): - with self.test_session(use_gpu=True): - for shape_dtype in [dtypes.int32, dtypes.int64]: - seed_t = array_ops.placeholder(dtypes.int64, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for stateless_op, _ in CASES: - for shape in (), (3,), (2, 5): - pure = stateless_op(constant_op.constant(shape, dtype=shape_dtype), - seed=seed_t) - values = [(seed, pure.eval(feed_dict={seed_t: seed})) - for seed in seeds] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) - - def testMatchStatefulMultinomial(self): - # Stateless ops should be the same as stateful ops on the first call - # after seed scrambling. - key = 0x3ec8f720, 0x02461e29 - num_samples = 4 - for logits_dtype in np.float16, np.float32, np.float64: - for output_dtype in dtypes.int32, dtypes.int64: - for seed in (7, 17), (11, 5), (2, 3): - preseed = invert_philox(key, - (seed[0], 0, seed[1], 0)).astype(np.uint64) - preseed = preseed[::2] | preseed[1::2] << 32 - random_seed.set_random_seed(seed[0]) - with self.test_session(use_gpu=True): - for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], - [0.25, 0.75]]): - logits_t = constant_op.constant(logits, dtype=logits_dtype) - stateful = random_ops.multinomial( - logits_t, - num_samples, - seed=seed[1], - output_dtype=output_dtype) - pure = stateless.stateless_multinomial( - logits_t, - num_samples, - seed=preseed, - output_dtype=output_dtype) - self.assertAllEqual(stateful.eval(), pure.eval()) + for stateless_op, _ in cases: + pure = stateless_op(seed=seed_t) + values = [(seed, pure.eval(feed_dict={seed_t: seed})) + for seed in seeds] + for s0, v0 in values: + for s1, v1 in values: + self.assertEqual(s0 == s1, np.all(v0 == v1)) - def testDeterminismMultinomial(self): - # Stateless values should be equal iff the seeds are equal (roughly) + def _float_cases(self, shape_dtypes=(None,)): + float_cases = ( + # Uniform distribution, with and without range + (stateless.stateless_random_uniform, random_ops.random_uniform, {}), + (stateless.stateless_random_uniform, random_ops.random_uniform, + dict(minval=2.2, maxval=7.1)), + # Normal distribution, with and without mean+stddev + (stateless.stateless_random_normal, random_ops.random_normal, {}), + (stateless.stateless_random_normal, random_ops.random_normal, + dict(mean=2, stddev=3)), + # Truncated normal distribution, with and without mean+stddev + (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}), + (stateless.stateless_truncated_normal, random_ops.truncated_normal, + dict(mean=3, stddev=4)), + ) + for dtype in dtypes.float16, dtypes.float32, dtypes.float64: + for shape_dtype in shape_dtypes: + for shape in (), (3,), (2, 5): + if shape_dtype is not None: + shape = constant_op.constant(shape, dtype=shape_dtype) + for stateless_op, stateful_op, kwds in float_cases: + kwds = dict(shape=shape, dtype=dtype, **kwds) + yield (functools.partial(stateless_op, **kwds), + functools.partial(stateful_op, **kwds)) + + def _int_cases(self, shape_dtypes=(None,)): + for shape_dtype in shape_dtypes: + for shape in (), (3,), (2, 5): + if shape_dtype is not None: + shape = constant_op.constant(shape, dtype=shape_dtype) + for dtype in dtypes.int32, dtypes.int64: + kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape) + yield (functools.partial(stateless.stateless_random_uniform, **kwds), + functools.partial(random_ops.random_uniform, **kwds)) + + def _multinomial_cases(self): num_samples = 10 - with self.test_session(use_gpu=True): - for seed_type in [dtypes.int32, dtypes.int64]: - seed_t = array_ops.placeholder(seed_type, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 + for logits_dtype in np.float16, np.float32, np.float64: + for output_dtype in dtypes.int32, dtypes.int64: for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], [0.25, 0.75]]): - pure = stateless.stateless_multinomial( - logits, num_samples, seed=seed_t) - values = [ - (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds - ] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) + kwds = dict(logits=constant_op.constant(logits, dtype=logits_dtype), + num_samples=num_samples, + output_dtype=output_dtype) + yield (functools.partial(stateless.stateless_multinomial, **kwds), + functools.partial(random_ops.multinomial, **kwds)) + + def testMatchFloat(self): + self._test_match(self._float_cases()) + + def testMatchInt(self): + self._test_match(self._int_cases()) + + def testMatchMultinomial(self): + self._test_match(self._multinomial_cases()) + + def testDeterminismFloat(self): + self._test_determinism(self._float_cases( + shape_dtypes=(dtypes.int32, dtypes.int64))) + + def testDeterminismInt(self): + self._test_determinism(self._int_cases( + shape_dtypes=(dtypes.int32, dtypes.int64))) + + def testDeterminismMultinomial(self): + self._test_determinism(self._multinomial_cases()) if __name__ == '__main__': diff --git a/tensorflow/contrib/stateless/python/stateless_ops.py b/tensorflow/contrib/stateless/python/stateless_ops.py new file mode 100644 index 0000000000..db9b7a87f2 --- /dev/null +++ b/tensorflow/contrib/stateless/python/stateless_ops.py @@ -0,0 +1,214 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Stateless random ops which take seed as a tensor input.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import gen_stateless_random_ops + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import math_ops + +ops.NotDifferentiable("StatelessMultinomial") +ops.NotDifferentiable("StatelessRandomNormal") +ops.NotDifferentiable("StatelessRandomUniform") +ops.NotDifferentiable("StatelessRandomUniformInt") +ops.NotDifferentiable("StatelessTruncatedNormal") + + +def stateless_random_uniform(shape, + seed, + minval=0, + maxval=None, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values from a uniform distribution. + + This is a stateless version of `tf.random_uniform`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + The generated values follow a uniform distribution in the range + `[minval, maxval)`. The lower bound `minval` is included in the range, while + the upper bound `maxval` is excluded. + + For floats, the default range is `[0, 1)`. For ints, at least `maxval` must + be specified explicitly. + + In the integer case, the random integers are slightly biased unless + `maxval - minval` is an exact power of two. The bias is small for values of + `maxval - minval` significantly smaller than the range of the output (either + `2**32` or `2**64`). + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the + range of random values to generate. Defaults to 0. + maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on + the range of random values to generate. Defaults to 1 if `dtype` is + floating point. + dtype: The type of the output: `float16`, `float32`, `float64`, `int32`, + or `int64`. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random uniform values. + + Raises: + ValueError: If `dtype` is integral and `maxval` is not specified. + """ + dtype = dtypes.as_dtype(dtype) + if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32, + dtypes.float64, dtypes.int32, dtypes.int64): + raise ValueError("Invalid dtype %r" % dtype) + if maxval is None: + if dtype.is_integer: + raise ValueError("Must specify maxval for integer dtype %r" % dtype) + maxval = 1 + with ops.name_scope(name, "stateless_random_uniform", + [shape, seed, minval, maxval]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + minval = ops.convert_to_tensor(minval, dtype=dtype, name="min") + maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") + if dtype.is_integer: + return gen_stateless_random_ops.stateless_random_uniform_int( + shape, seed=seed, minval=minval, maxval=maxval, name=name) + else: + rnd = gen_stateless_random_ops.stateless_random_uniform( + shape, seed=seed, dtype=dtype) + return math_ops.add(rnd * (maxval - minval), minval, name=name) + + +def stateless_random_normal(shape, + seed, + mean=0.0, + stddev=1.0, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values from a normal distribution. + + This is a stateless version of `tf.random_normal`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + mean: A 0-D Tensor or Python value of type `dtype`. The mean of the normal + distribution. + stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation + of the normal distribution. + dtype: The type of the output. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random normal values. + """ + with ops.name_scope(name, "stateless_random_normal", + [shape, seed, mean, stddev]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean") + stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") + rnd = gen_stateless_random_ops.stateless_random_normal(shape, seed, dtype) + return math_ops.add(rnd * stddev, mean, name=name) + + +def stateless_truncated_normal(shape, + seed, + mean=0.0, + stddev=1.0, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values, truncated normally distributed. + + This is a stateless version of `tf.truncated_normal`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + The generated values follow a normal distribution with specified mean and + standard deviation, except that values whose magnitude is more than 2 standard + deviations from the mean are dropped and re-picked. + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + mean: A 0-D Tensor or Python value of type `dtype`. The mean of the + truncated normal distribution. + stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation + of the normal distribution, before truncation. + dtype: The type of the output. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random truncated normal values. + """ + with ops.name_scope(name, "stateless_truncated_normal", + [shape, seed, mean, stddev]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean") + stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") + rnd = gen_stateless_random_ops.stateless_truncated_normal( + shape, seed, dtype) + return math_ops.add(rnd * stddev, mean, name=name) + + +def stateless_multinomial(logits, + num_samples, + seed, + output_dtype=dtypes.int64, + name=None): + """Draws deterministic pseudorandom samples from a multinomial distribution. + + This is a stateless version of `tf.multinomial`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + Example: + + ```python + # samples has shape [1, 5], where each value is either 0 or 1 with equal + # probability. + samples = tf.contrib.stateless.stateless_multinomial( + tf.log([[10., 10.]]), 5, seed=[7, 17]) + ``` + + Args: + logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice + `[i, :]` represents the unnormalized log-probabilities for all classes. + num_samples: 0-D. Number of independent samples to draw for each row slice. + seed: A shape [2] integer Tensor of seeds to the random number generator. + name: Optional name for the operation. + output_dtype: integer type to use for the output. Defaults to int64. + + Returns: + The drawn samples of shape `[batch_size, num_samples]`. + """ + with ops.name_scope(name, "stateless_multinomial", [logits, seed]): + logits = ops.convert_to_tensor(logits, name="logits") + return gen_stateless_random_ops.stateless_multinomial( + logits, num_samples, seed, output_dtype=output_dtype) diff --git a/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt new file mode 100644 index 0000000000..b6a6dbdf54 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt @@ -0,0 +1,46 @@ +op { + graph_op_name: "StatelessRandomUniformInt" + visibility: HIDDEN + in_arg { + name: "shape" + description: <("T"), \ RandomGammaOp) -#define REGISTER_INT(IntType) \ - REGISTER_KERNEL_BUILDER(Name("RandomUniformInt") \ - .Device(DEVICE_CPU) \ - .HostMemory("shape") \ - .HostMemory("minval") \ - .HostMemory("maxval") \ - .TypeConstraint("Tout"), \ +#define REGISTER_INT(IntType) \ + template struct functor::FillPhiloxRandom< \ + CPUDevice, random::UniformDistribution>; \ + REGISTER_KERNEL_BUILDER(Name("RandomUniformInt") \ + .Device(DEVICE_CPU) \ + .HostMemory("shape") \ + .HostMemory("minval") \ + .HostMemory("maxval") \ + .TypeConstraint("Tout"), \ RandomUniformIntOp); TF_CALL_half(REGISTER); @@ -538,14 +540,16 @@ TF_CALL_int64(REGISTER_INT); random::TruncatedNormalDistribution< \ random::SingleSampleAdapter, TYPE>>); -#define REGISTER_INT(IntType) \ - REGISTER_KERNEL_BUILDER(Name("RandomUniformInt") \ - .Device(DEVICE_GPU) \ - .HostMemory("shape") \ - .HostMemory("minval") \ - .HostMemory("maxval") \ - .TypeConstraint("T") \ - .TypeConstraint("Tout"), \ +#define REGISTER_INT(IntType) \ + template struct functor::FillPhiloxRandom< \ + GPUDevice, random::UniformDistribution>; \ + REGISTER_KERNEL_BUILDER(Name("RandomUniformInt") \ + .Device(DEVICE_GPU) \ + .HostMemory("shape") \ + .HostMemory("minval") \ + .HostMemory("maxval") \ + .TypeConstraint("T") \ + .TypeConstraint("Tout"), \ RandomUniformIntOp); TF_CALL_half(REGISTER); diff --git a/tensorflow/core/kernels/stateless_random_ops.cc b/tensorflow/core/kernels/stateless_random_ops.cc index eab176c7fb..925f5291a6 100644 --- a/tensorflow/core/kernels/stateless_random_ops.cc +++ b/tensorflow/core/kernels/stateless_random_ops.cc @@ -113,74 +113,109 @@ class StatelessRandomOp : public StatelessRandomOpBase { } }; -#define REGISTER(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessRandomUniform") \ - .Device(DEVICE_CPU) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp >); \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessRandomNormal") \ - .Device(DEVICE_CPU) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp >); \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessTruncatedNormal") \ - .Device(DEVICE_CPU) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp< \ - CPUDevice, \ - random::TruncatedNormalDistribution< \ - random::SingleSampleAdapter, TYPE> >); +template +class StatelessRandomUniformIntOp : public StatelessRandomOpBase { + public: + using StatelessRandomOpBase::StatelessRandomOpBase; -TF_CALL_half(REGISTER); -TF_CALL_float(REGISTER); -TF_CALL_double(REGISTER); + void Fill(OpKernelContext* context, random::PhiloxRandom random, + Tensor* output) override { + const Tensor& minval = context->input(2); + const Tensor& maxval = context->input(3); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(minval.shape()), + errors::InvalidArgument("minval must be 0-D, got shape ", + minval.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(maxval.shape()), + errors::InvalidArgument("maxval must be 0-D, got shape ", + maxval.shape().DebugString())); + + // Verify that minval < maxval. Note that we'll never reach this point for + // empty output. Zero impossible things are fine. + const auto lo = minval.scalar()(); + const auto hi = maxval.scalar()(); + OP_REQUIRES( + context, lo < hi, + errors::InvalidArgument("Need minval < maxval, got ", lo, " >= ", hi)); + + // Build distribution + typedef random::UniformDistribution + Distribution; + Distribution dist(lo, hi); + + auto flat = output->flat(); + // Reuse the compute kernels from the stateful random ops + functor::FillPhiloxRandom()( + context, context->eigen_device(), random, flat.data(), + flat.size(), dist); + } +}; -#undef REGISTER +#define REGISTER(DEVICE, TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("StatelessRandomUniform") \ + .Device(DEVICE_##DEVICE) \ + .HostMemory("shape") \ + .HostMemory("seed") \ + .TypeConstraint("dtype"), \ + StatelessRandomOp >); \ + REGISTER_KERNEL_BUILDER( \ + Name("StatelessRandomNormal") \ + .Device(DEVICE_##DEVICE) \ + .HostMemory("shape") \ + .HostMemory("seed") \ + .TypeConstraint("dtype"), \ + StatelessRandomOp >); \ + REGISTER_KERNEL_BUILDER( \ + Name("StatelessTruncatedNormal") \ + .Device(DEVICE_##DEVICE) \ + .HostMemory("shape") \ + .HostMemory("seed") \ + .TypeConstraint("dtype"), \ + StatelessRandomOp< \ + DEVICE##Device, \ + random::TruncatedNormalDistribution< \ + random::SingleSampleAdapter, TYPE> >); + +#define REGISTER_INT(DEVICE, TYPE) \ + REGISTER_KERNEL_BUILDER(Name("StatelessRandomUniformInt") \ + .Device(DEVICE_##DEVICE) \ + .HostMemory("shape") \ + .HostMemory("seed") \ + .HostMemory("minval") \ + .HostMemory("maxval") \ + .TypeConstraint("dtype"), \ + StatelessRandomUniformIntOp); + +#define REGISTER_CPU(TYPE) REGISTER(CPU, TYPE) +#define REGISTER_GPU(TYPE) REGISTER(GPU, TYPE) +#define REGISTER_INT_CPU(TYPE) REGISTER_INT(CPU, TYPE) +#define REGISTER_INT_GPU(TYPE) REGISTER_INT(GPU, TYPE) + +TF_CALL_half(REGISTER_CPU); +TF_CALL_bfloat16(REGISTER_CPU); +TF_CALL_float(REGISTER_CPU); +TF_CALL_double(REGISTER_CPU); +TF_CALL_int32(REGISTER_INT_CPU); +TF_CALL_int64(REGISTER_INT_CPU); #if GOOGLE_CUDA -#define REGISTER(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessRandomUniform") \ - .Device(DEVICE_GPU) \ - .HostMemory("shape") \ - .HostMemory("seed") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp >); \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessRandomNormal") \ - .Device(DEVICE_GPU) \ - .HostMemory("shape") \ - .HostMemory("seed") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp >); \ - REGISTER_KERNEL_BUILDER( \ - Name("StatelessTruncatedNormal") \ - .Device(DEVICE_GPU) \ - .HostMemory("shape") \ - .HostMemory("seed") \ - .TypeConstraint("dtype"), \ - StatelessRandomOp< \ - GPUDevice, \ - random::TruncatedNormalDistribution< \ - random::SingleSampleAdapter, TYPE> >); +TF_CALL_half(REGISTER_GPU); +TF_CALL_float(REGISTER_GPU); +TF_CALL_double(REGISTER_GPU); +TF_CALL_int32(REGISTER_INT_GPU); +TF_CALL_int64(REGISTER_INT_GPU); -TF_CALL_half(REGISTER); -TF_CALL_float(REGISTER); -TF_CALL_double(REGISTER); +#endif // GOOGLE_CUDA #undef REGISTER - -#endif // GOOGLE_CUDA +#undef REGISTER_INT +#undef REGISTER_CPU +#undef REGISTER_GPU +#undef REGISTER_INT_CPU +#undef REGISTER_INT_GPU } // namespace diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc index 742709fb18..f919a21d60 100644 --- a/tensorflow/core/ops/stateless_random_ops.cc +++ b/tensorflow/core/ops/stateless_random_ops.cc @@ -19,42 +19,55 @@ limitations under the License. namespace tensorflow { using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; using shape_inference::ShapeHandle; -static Status StatelessShape(shape_inference::InferenceContext* context) { +static Status StatelessShape(InferenceContext* c) { // Check seed shape ShapeHandle seed; - TF_RETURN_IF_ERROR(context->WithRank(context->input(1), 1, &seed)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &seed)); DimensionHandle unused; - TF_RETURN_IF_ERROR(context->WithValue(context->Dim(seed, 0), 2, &unused)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(seed, 0), 2, &unused)); // Set output shape ShapeHandle out; - TF_RETURN_IF_ERROR(context->MakeShapeFromShapeTensor(0, &out)); - context->set_output(0, out); + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out)); + c->set_output(0, out); return Status::OK(); } -#define REGISTER_STATELESS_OP(name) \ - REGISTER_OP(name) \ - .Input("shape: T") \ - .Input("seed: Tseed") \ - .Output("output: dtype") \ - .Attr("dtype: {half,float,double} = DT_FLOAT") \ - .Attr("T: {int32, int64} = DT_INT32") \ - .Attr("Tseed: {int32, int64} = DT_INT64") \ +#define REGISTER_STATELESS_OP(name) \ + REGISTER_OP(name) \ + .Input("shape: T") \ + .Input("seed: Tseed") \ + .Output("output: dtype") \ + .Attr("dtype: {half,bfloat16,float,double} = DT_FLOAT") \ + .Attr("T: {int32, int64} = DT_INT32") \ + .Attr("Tseed: {int32, int64} = DT_INT64") \ .SetShapeFn(StatelessShape) -// This op is exposed through contrib/stateless only. The interface may change. REGISTER_STATELESS_OP("StatelessRandomUniform"); - -// This op is exposed through contrib/stateless only. The interface may change. REGISTER_STATELESS_OP("StatelessRandomNormal"); - -// This op is exposed through contrib/stateless only. The interface may change. REGISTER_STATELESS_OP("StatelessTruncatedNormal"); -// This op is exposed through contrib/stateless only. The interface may change. +#undef REGISTER_STATELESS_OP + +REGISTER_OP("StatelessRandomUniformInt") + .Input("shape: T") + .Input("seed: Tseed") + .Input("minval: dtype") + .Input("maxval: dtype") + .Output("output: dtype") + .Attr("dtype: {int32, int64}") + .Attr("T: {int32, int64}") + .Attr("Tseed: {int32, int64} = DT_INT64") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + return StatelessShape(c); + }); + REGISTER_OP("StatelessMultinomial") .Input("logits: T") .Input("num_samples: int32") @@ -80,6 +93,4 @@ REGISTER_OP("StatelessMultinomial") return Status::OK(); }); -#undef REGISTER_STATELESS_OP - } // namespace tensorflow -- GitLab From c966b5eed60a570f2121cb84ddb4ece84c413719 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 15:08:18 -0700 Subject: [PATCH 030/411] Add DistributionStrategy support to moving average APIs. Fixes #21405. PiperOrigin-RevId: 215973401 --- tensorflow/contrib/distribute/python/BUILD | 18 +++ .../distribute/python/moving_averages_test.py | 141 ++++++++++++++++++ tensorflow/python/training/moving_averages.py | 49 +++--- 3 files changed, 189 insertions(+), 19 deletions(-) create mode 100644 tensorflow/contrib/distribute/python/moving_averages_test.py diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 8267612236..76d5b59ce1 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -411,6 +411,24 @@ cuda_py_test( ], ) +cuda_py_test( + name = "moving_averages_test", + srcs = ["moving_averages_test.py"], + additional_deps = [ + ":combinations", + "@absl_py//absl/testing:parameterized", + "//tensorflow/python/eager:test", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + ], + tags = [ + "no_pip", + ], +) + cuda_py_test( name = "optimizer_v2_test", srcs = ["optimizer_v2_test.py"], diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py new file mode 100644 index 0000000000..119352ad91 --- /dev/null +++ b/tensorflow/contrib/distribute/python/moving_averages_test.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for training.moving_averages when using a DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.training import moving_averages + + +all_combinations = combinations.combine( + distribution=[combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu], + mode=["graph"]) + + +class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): + + @combinations.generate(all_combinations) + def testTowerModeWithoutZeroDebias(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + return var, assign + + with distribution.scope(), self.cached_session() as sess: + var, assign = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(distribution.unwrap(assign)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testTowerMode(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([0.0, 0.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average(var, val, decay) + return var, assign.op + + with distribution.scope(), self.cached_session() as sess: + var, assign_op = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(distribution.unwrap(assign_op)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + self.assertAllClose(average_val, var.eval()) + + @combinations.generate(all_combinations) + def testCrossTowerWithoutZeroDebias(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0, 2.0]) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(assign) + average_val = [1.0, 2.0] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + # Also try assign.op. + sess.run(assign.op) + orig_weight = 0.25 * 0.25 + val_weight = 1.0 - orig_weight + self.assertAllClose( + [10.0 * orig_weight + average_val[0] * val_weight, + 11.0 * orig_weight + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testCrossTower(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([0.0, 0.0]) + val = array_ops.placeholder(dtypes.float32) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average(var, val, decay) + + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(assign, feed_dict={val: [1.0, 2.0]}) + self.assertAllClose([1.0, 2.0], var.eval()) + + # Also try assign.op. + sess.run(assign.op, feed_dict={val: [10.0, 0.0]}) + self.assertAllClose( + [(1.0 * 0.25 + 10.0) / (1.0 * 0.25 + 1.0), + (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)], + var.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py index 041266da3e..89bfcaf4ad 100644 --- a/tensorflow/python/training/moving_averages.py +++ b/tensorflow/python/training/moving_averages.py @@ -25,6 +25,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.training import distribution_strategy_context from tensorflow.python.training import slot_creator from tensorflow.python.util.tf_export import tf_export @@ -36,9 +37,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None): The moving average of 'variable' updated with 'value' is: variable * decay + value * (1 - decay) - The returned Operation sets 'variable' to the newly computed moving average. - - The new value of 'variable' can be set with the 'AssignSub' op as: + The returned Operation sets 'variable' to the newly computed moving average, + by performing this subtraction: variable -= (1 - decay) * (variable - value) Since variables that are initialized to a `0` value will be `0` biased, @@ -50,7 +50,7 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None): The names of the debias shadow variables, by default, include both the scope they were created in and the scope of the variables they debias. They are also - given a uniqifying-suffix. + given a uniquifying-suffix. E.g.: @@ -58,8 +58,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None): with tf.variable_scope('scope1'): with tf.variable_scope('scope2'): var = tf.get_variable('foo') - tf.assign_moving_average(var, 0.0, 1.0) - tf.assign_moving_average(var, 0.0, 0.9) + update_1 = tf.assign_moving_average(var, 0.0, 1.0) + update_2 = tf.assign_moving_average(var, 0.0, 0.9) # var.name: 'scope1/scope2/foo' # shadow var names: 'scope1/scope2/scope1/scope2/foo/biased' @@ -76,20 +76,33 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None): name: Optional name of the returned operation. Returns: - A reference to the input 'variable' tensor with the newly computed - moving average. + A tensor which if evaluated will compute and return the new moving average. """ + def update_fn(v, value, decay=decay): + decay = ops.convert_to_tensor(1.0 - decay, name="decay") + if decay.dtype != v.dtype.base_dtype: + decay = math_ops.cast(decay, v.dtype.base_dtype) + if zero_debias: + update_delta = _zero_debias(v, value, decay) + else: + update_delta = (v - value) * decay + return state_ops.assign_sub(v, update_delta, name=scope) + with ops.name_scope(name, "AssignMovingAvg", [variable, value, decay]) as scope: - with ops.colocate_with(variable): - decay = ops.convert_to_tensor(1.0 - decay, name="decay") - if decay.dtype != variable.dtype.base_dtype: - decay = math_ops.cast(decay, variable.dtype.base_dtype) - if zero_debias: - update_delta = _zero_debias(variable, value, decay) - else: - update_delta = (variable - value) * decay - return state_ops.assign_sub(variable, update_delta, name=scope) + tower_context = distribution_strategy_context.get_tower_context() + if tower_context: + # In a tower context, we update variable using the mean of value across + # towers. + def merge_fn(strategy, v, value): + value = strategy.reduce( + variable_scope.VariableAggregation.MEAN, value, v) + return strategy.update(v, update_fn, value) + + return tower_context.merge_call(merge_fn, variable, value) + else: + strategy = distribution_strategy_context.get_cross_tower_context() + return strategy.update(variable, update_fn, value) def weighted_moving_average(value, @@ -379,8 +392,6 @@ class ExponentialMovingAverage(object): Raises: TypeError: If the arguments are not an allowed type. - ValueError: If the moving average of one of the variables is already - being computed. """ # TODO(touts): op_scope if var_list is None: -- GitLab From 5ac6e1e4b8318bad2f2bc7e5a08a58a7ed31e4c6 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool Date: Fri, 5 Oct 2018 15:43:32 -0700 Subject: [PATCH 031/411] Removes the INTEL_MKL_ML_ONLY option from the CMakeLists build file since the main logic for INTEL_MKL_ML_ONLY is getting removed in PR#22783. #22783 PiperOrigin-RevId: 215978712 --- tensorflow/contrib/cmake/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index f675c135f4..60f53b8b75 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -352,9 +352,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination) include_directories(${mkldnn_INCLUDE_DIRS}) - else (tensorflow_ENABLE_MKLDNN_SUPPORT) - add_definitions(-DINTEL_MKL_ML_ONLY) - endif() + endif(tensorflow_ENABLE_MKLDNN_SUPPORT) endif (tensorflow_ENABLE_MKL_SUPPORT) if (tensorflow_ENABLE_GPU) -- GitLab From 4aad5382f0e7148d8489d24d8355b828b3f7811b Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Fri, 5 Oct 2018 15:43:58 -0700 Subject: [PATCH 032/411] Internal change PiperOrigin-RevId: 215978771 --- tensorflow/contrib/lite/java/BUILD | 95 ++++++++++++++----- tensorflow/contrib/lite/java/aar_with_jni.bzl | 5 +- .../org/tensorflow/lite/TensorFlowLite.java | 20 +++- .../tensorflow/lite/InterpreterFlexTest.java | 46 +++++++++ .../org/tensorflow/lite/InterpreterTest.java | 14 +++ 5 files changed, 153 insertions(+), 27 deletions(-) create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 098ba7e773..e68cd26f81 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -11,6 +11,10 @@ load("//tensorflow/java:build_defs.bzl", "JAVACOPTS") load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary") load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni") +JAVA_SRCS = glob([ + "src/main/java/org/tensorflow/lite/*.java", +]) + # Building tensorflow-lite.aar including 4 variants of .so # To build an aar for release, run below command: # bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \ @@ -20,28 +24,38 @@ aar_with_jni( android_library = ":tensorflowlite", ) +# EXPERIMENTAL: AAR target that supports TensorFlow op execution with TFLite. +aar_with_jni( + name = "tensorflow-lite-flex", + android_library = ":tensorflowlite_flex", +) + android_library( name = "tensorflowlite", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, + manifest = "AndroidManifest.xml", + visibility = ["//visibility:public"], + deps = [ + ":tensorflowlite_native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Android target that supports TensorFlow op execution with TFLite. +android_library( + name = "tensorflowlite_flex", + srcs = JAVA_SRCS, manifest = "AndroidManifest.xml", visibility = ["//visibility:public"], deps = [ - ":tflite_runtime", + ":tensorflowlite_native_flex", "@org_checkerframework_qual", ], ) android_library( name = "tensorflowlite_java", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, visibility = ["//visibility:public"], deps = [ "@org_checkerframework_qual", @@ -50,16 +64,23 @@ android_library( java_library( name = "tensorflowlitelib", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, javacopts = JAVACOPTS, visibility = ["//visibility:public"], deps = [ ":libtensorflowlite_jni.so", - "//tensorflow/contrib/lite/java/src/main/native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Java target that supports TensorFlow op execution with TFLite. +java_library( + name = "tensorflowlitelib_flex", + srcs = JAVA_SRCS, + javacopts = JAVACOPTS, + visibility = ["//visibility:public"], + deps = [ + ":libtensorflowlite_flex_jni.so", "@org_checkerframework_qual", ], ) @@ -72,7 +93,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.TensorFlowLiteTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -87,7 +107,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.DataTypeTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -110,7 +129,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -125,19 +143,37 @@ java_test( data = [ "src/testdata/add.bin", "src/testdata/mobilenet.tflite.bin", + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", ], javacopts = JAVACOPTS, tags = ["no_oss"], test_class = "org.tensorflow.lite.InterpreterTest", visibility = ["//visibility:private"], deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", ], ) +java_test( + name = "InterpreterFlexTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/InterpreterFlexTest.java"], + data = [ + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", + ], + javacopts = JAVACOPTS, + tags = ["no_oss"], + test_class = "org.tensorflow.lite.InterpreterFlexTest", + visibility = ["//visibility:private"], + deps = [ + ":tensorflowlitelib_flex", + "@com_google_truth", + "@junit", + ], +) + java_test( name = "TensorTest", size = "small", @@ -164,14 +200,29 @@ filegroup( ) cc_library( - name = "tflite_runtime", + name = "tensorflowlite_native", srcs = ["libtensorflowlite_jni.so"], visibility = ["//visibility:public"], ) +cc_library( + name = "tensorflowlite_native_flex", + srcs = ["libtensorflowlite_flex_jni.so"], + visibility = ["//visibility:public"], +) + tflite_jni_binary( name = "libtensorflowlite_jni.so", deps = [ "//tensorflow/contrib/lite/java/src/main/native", ], ) + +# EXPERIMENTAL: Native target that supports TensorFlow op execution with TFLite. +tflite_jni_binary( + name = "libtensorflowlite_flex_jni.so", + deps = [ + "//tensorflow/contrib/lite/delegates/flex:delegate", + "//tensorflow/contrib/lite/java/src/main/native", + ], +) diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl index 9d2aead266..360d622b1b 100644 --- a/tensorflow/contrib/lite/java/aar_with_jni.bzl +++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl @@ -30,7 +30,10 @@ EOF # In some platforms we don't have an Android SDK/NDK and this target # can't be built. We need to prevent the build system from trying to # use the target in that case. - tags = ["manual"], + tags = [ + "manual", + "no_cuda_on_cpu_tap", + ], ) native.genrule( diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java index 711638a9f9..d5447b3bf8 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java @@ -18,7 +18,8 @@ package org.tensorflow.lite; /** Static utility methods loading the TensorFlowLite runtime. */ public final class TensorFlowLite { - private static final String LIBNAME = "tensorflowlite_jni"; + private static final String PRIMARY_LIBNAME = "tensorflowlite_jni"; + private static final String FALLBACK_LIBNAME = "tensorflowlite_flex_jni"; private TensorFlowLite() {} @@ -29,13 +30,24 @@ public final class TensorFlowLite { * Load the TensorFlowLite runtime C library. */ static boolean init() { + Throwable primaryLibException; try { - System.loadLibrary(LIBNAME); + System.loadLibrary(PRIMARY_LIBNAME); return true; } catch (UnsatisfiedLinkError e) { - System.err.println("TensorFlowLite: failed to load native library: " + e.getMessage()); - return false; + primaryLibException = e; } + + try { + System.loadLibrary(FALLBACK_LIBNAME); + return true; + } catch (UnsatisfiedLinkError e) { + // If the fallback fails, log the error for the primary load instead. + System.err.println( + "TensorFlowLite: failed to load native library: " + primaryLibException.getMessage()); + } + + return false; } static { diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java new file mode 100644 index 0000000000..2791c3864b --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.File; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Unit tests for {@link org.tensorflow.lite.Interpreter} that validate execution with models that + * have TensorFlow ops. + */ +@RunWith(JUnit4.class) +public final class InterpreterFlexTest { + + private static final File FLEX_MODEL_FILE = + new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin"); + + /** Smoke test validating that flex model loading works when the flex delegate is linked. */ + @Test + public void testFlexModel() throws Exception { + try (Interpreter interpreter = new Interpreter(FLEX_MODEL_FILE)) { + assertThat(interpreter.getInputTensorCount()).isEqualTo(4); + assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32); + assertThat(interpreter.getOutputTensorCount()).isEqualTo(4); + assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32); + interpreter.run(new float[1], new float[1]); + } + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index a98fca0132..f8b73c7cf3 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -43,6 +43,9 @@ public final class InterpreterTest { private static final File MOBILENET_MODEL_FILE = new File("tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin"); + private static final File FLEX_MODEL_FILE = + new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin"); + @Test public void testInterpreter() throws Exception { Interpreter interpreter = new Interpreter(MODEL_FILE); @@ -345,4 +348,15 @@ public final class InterpreterTest { interpreter.close(); interpreter.close(); } + + /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */ + @Test + public void testFlexModel() throws Exception { + try { + new Interpreter(FLEX_MODEL_FILE); + fail(); + } catch (IllegalStateException e) { + // Expected failure. + } + } } -- GitLab From 89c887558d8b0067213c39a79d5d048d3422b6dd Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 5 Oct 2018 16:02:49 -0700 Subject: [PATCH 033/411] [TF:XLA] Bump open source abseil revision to e821380d69a549dc64900693942789d21aa4df5e PiperOrigin-RevId: 215981413 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b9ced1bd6c..6f5aa85b01 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -112,11 +112,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_absl", build_file = clean_dep("//third_party:com_google_absl.BUILD"), - sha256 = "507903ef9353cb25cccd0a6840048fdd348fd20e98314d694f04a990c0f277e3", - strip_prefix = "abseil-cpp-f21d187b80e3b7f08fb279775ea9c8b48c636030", + sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82", + strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e", urls = [ - "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz", - "https://github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz", + "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz", ], ) -- GitLab From 1daaf0fabee1c59af00e14f358d08ac9f5390b9f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 5 Oct 2018 16:32:30 -0700 Subject: [PATCH 034/411] Orders non-resource-affecting stateful ops in defuns. PiperOrigin-RevId: 215985679 --- tensorflow/python/eager/function.py | 7 +++++++ tensorflow/python/kernel_tests/logging_ops_test.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 2750461fb2..f06148b5d2 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1906,8 +1906,10 @@ class AutomaticControlDependencies(object): last_op_using_resource_tensor[inp] = op ops_which_must_run = set([op]) continue + found_resource = False for inp in op.inputs: if inp.dtype == dtypes_module.resource: + found_resource = True # Deal with switches, finally. if inp.op.type == "Switch": self._process_switch(inp.op, ops_which_must_run, @@ -1922,6 +1924,11 @@ class AutomaticControlDependencies(object): if inp in merge_for_resource: merge_for_resource[inp]._add_control_input(op) # pylint: disable=protected-access last_op_using_resource_tensor[inp] = op + if (op.op_def.is_stateful and not found_resource + and op._control_flow_context is None): # pylint: disable=protected-access + if None in last_op_using_resource_tensor: + op._add_control_input(last_op_using_resource_tensor[None]) # pylint: disable=protected-access + last_op_using_resource_tensor[None] = op control_inputs = [c for c in control_inputs if c._control_flow_context is op._control_flow_context] # pylint: disable=protected-access op._add_control_inputs(control_inputs) # pylint: disable=protected-access diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py index 4beddd00bb..2f19ecc0e6 100644 --- a/tensorflow/python/kernel_tests/logging_ops_test.py +++ b/tensorflow/python/kernel_tests/logging_ops_test.py @@ -306,6 +306,19 @@ class PrintV2Test(test.TestCase): logging_ops.print_v2(tensor) self.assertTrue((expected + "\n") in printed.contents()) + def testPrintsOrderedInDefun(self): + with context.eager_mode(): + + @function.defun + def prints(): + logging_ops.print_v2("A") + logging_ops.print_v2("B") + logging_ops.print_v2("C") + + with self.captureWritesToStream(sys.stderr) as printed: + prints() + self.assertTrue(("A\nB\nC\n") in printed.contents()) + @test_util.run_in_graph_and_eager_modes() def testPrintInDefunWithoutExplicitEvalOfPrint(self): @function.defun -- GitLab From 29af23aeadd1d6fccbfa4223b58dad8f5b8df4f8 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 5 Oct 2018 16:47:07 -0700 Subject: [PATCH 035/411] Fix api_compatibility_test diff for large files. assertEqual might be applied instead of assertMultiLineEqual if input is too large (https://bugs.python.org/issue11763). This change is switching to use unified_diff in that case. PiperOrigin-RevId: 215987656 --- tensorflow/python/util/protobuf/compare.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/util/protobuf/compare.py b/tensorflow/python/util/protobuf/compare.py index a0e6bf65cf..3a3af4bffa 100644 --- a/tensorflow/python/util/protobuf/compare.py +++ b/tensorflow/python/util/protobuf/compare.py @@ -63,6 +63,7 @@ from __future__ import division from __future__ import print_function import collections +import difflib import six @@ -101,10 +102,19 @@ def assertProtoEqual(self, a, b, check_initialized=True, # pylint: disable=inva if normalize_numbers: NormalizeNumberFields(pb) - self.assertMultiLineEqual( - text_format.MessageToString(a, descriptor_pool=pool), - text_format.MessageToString(b, descriptor_pool=pool), - msg=msg) + a_str = text_format.MessageToString(a, descriptor_pool=pool) + b_str = text_format.MessageToString(b, descriptor_pool=pool) + + # Some Python versions would perform regular diff instead of multi-line + # diff if string is longer than 2**16. We substitute this behavior + # with a call to unified_diff instead to have easier-to-read diffs. + # For context, see: https://bugs.python.org/issue11763. + if len(a_str) < 2**16 and len(b_str) < 2**16: + self.assertMultiLineEqual(a_str, b_str, msg=msg) + else: + diff = '\n' + ''.join(difflib.unified_diff(a_str.splitlines(True), + b_str.splitlines(True))) + self.fail('%s : %s' % (msg, diff)) def NormalizeNumberFields(pb): -- GitLab From 55081a9d21ab42834ac4fb70351e3d2ee13ef78b Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 5 Oct 2018 16:47:51 -0700 Subject: [PATCH 036/411] [XLA:GPU] Use a struct for the return value of CudnnConvolutionAlgorithmPicker::PickBestAlgorithm. Using a struct lets us return additional data -- namely, the elapsed time to run the best algo -- without adding a fourth entry to the tuple, which would be confusing. No functional change. PiperOrigin-RevId: 215987795 --- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../gpu/cudnn_convolution_algorithm_picker.cc | 40 ++++++++----------- .../gpu/cudnn_convolution_algorithm_picker.h | 11 ++++- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 522e9f5948..7b84f691f6 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -404,6 +404,7 @@ cc_library( "//tensorflow/core:stream_executor_no_cuda", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index 7125673887..590c0a7d54 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -145,7 +145,7 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { // cache misses and doing extra work. Overall, caching doesn't seem worth the // trouble, but we may want to revisit this if we ever find a model where // caching would speed up compilation a lot. -StatusOr> +StatusOr CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( HloCustomCallInstruction* instr) { // TODO(timshen): for now only check fp16. It can be expanded to other types, @@ -316,9 +316,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( << AlgorithmToString(best_result.algorithm()) << ", takes " << best_result.elapsed_time_in_ms() << "ms, and uses " << best_result_bytes_used << "B of scratch memory."; - return std::make_tuple(best_result.algorithm().algo_id(), - best_result.algorithm().tensor_ops_enabled(), - best_result_bytes_used); + return AutotuneResult{best_result.algorithm().algo_id(), + best_result.algorithm().tensor_ops_enabled(), + best_result_bytes_used, + absl::Milliseconds(best_result.elapsed_time_in_ms())}; } return InternalError( @@ -331,37 +332,30 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( HloInstruction* instr) { CHECK(IsCustomCallToDnnConvolution(*instr)); - StatusOr> alg_scratch_and_tc = + StatusOr best_algo_or = PickBestAlgorithm(Cast(instr)); - - if (!alg_scratch_and_tc.ok()) { - LOG(ERROR) << alg_scratch_and_tc.status(); + if (!best_algo_or.ok()) { + LOG(ERROR) << best_algo_or.status(); return false; } - int64 algorithm; - bool tensor_ops_enabled; - int64 scratch_bytes; - - std::tie(algorithm, tensor_ops_enabled, scratch_bytes) = - alg_scratch_and_tc.ConsumeValueOrDie(); - - VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and " - << NumBytesToString(scratch_bytes) + auto best_algo = std::move(best_algo_or).ValueOrDie(); + VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm + << " and " << NumBytesToString(best_algo.scratch_bytes) << " of scratch memory: " << instr->ToString() - << " tensor_ops_enabled: " << tensor_ops_enabled; + << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled; // Replace instr with a new CustomCall which has the correct algorithm, and // whose output shape has the appropriate amount of scratch memory. HloComputation* computation = instr->parent(); - Shape new_call_shape = - ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0), - ShapeUtil::MakeShape(U8, {scratch_bytes})}); + Shape new_call_shape = ShapeUtil::MakeTupleShape( + {instr->shape().tuple_shapes(0), + ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})}); TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config, instr->backend_config()); - backend_config.set_algorithm(algorithm); - backend_config.set_tensor_ops_enabled(tensor_ops_enabled); + backend_config.set_algorithm(best_algo.algorithm); + backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled); HloInstruction* new_call = computation->AddInstruction( instr->CloneWithNewOperands(new_call_shape, instr->operands())); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h index aeda2fc7f8..136c32210a 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#include "absl/time/time.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" @@ -47,10 +48,16 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass { StatusOr Run(HloModule* module) override; private: + struct AutotuneResult { + int64 algorithm; + bool tensor_ops_enabled; + int64 scratch_bytes; + absl::Duration runtime; + }; + StatusOr RunOnComputation(HloComputation* computation); StatusOr RunOnInstruction(HloInstruction* instr); - StatusOr> PickBestAlgorithm( - HloCustomCallInstruction* instr); + StatusOr PickBestAlgorithm(HloCustomCallInstruction* instr); se::StreamExecutor* stream_exec_; // never null DeviceMemoryAllocator* allocator_; // may be null -- GitLab From ab97f1323bd2a98d20ed82dc3ff8585481961f0d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 16:59:39 -0700 Subject: [PATCH 037/411] Automated rollback of commit d258207f1583df4faa452265b051879af6c15dac PiperOrigin-RevId: 215989111 --- tensorflow/python/ops/array_ops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 4be9c532f4..e3e4d5f910 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1407,8 +1407,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False): gen_array_ops.conjugate_transpose if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose) if perm is None: - rank = gen_array_ops.rank(a) - perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + a = ops.convert_to_tensor(a, name="a") + if not a.get_shape().ndims: + rank = gen_array_ops.rank(a) + perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + else: + rank = a.get_shape().ndims + perm = (rank - 1) - np.arange(rank) ret = transpose_fn(a, perm, name=name) # NOTE(mrry): Setting the shape explicitly because # reverse is not handled by the shape function. -- GitLab From 15d399cd8590c18dc643d979883fe4201c8ea631 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Fri, 5 Oct 2018 17:01:01 -0700 Subject: [PATCH 038/411] [tf.data vectorization] Feed inputs to vectorizers with notion of stackedness PiperOrigin-RevId: 215989259 --- .../optimizers/data/vectorization/BUILD | 10 ++ .../data/vectorization/cast_vectorizer.cc | 16 +-- .../data/vectorization/unpack_vectorizer.cc | 16 +-- .../data/vectorization/vectorizer.h | 19 ++- .../data/vectorization/vectorizer_registry.cc | 2 - .../data/vectorization/vectorizer_registry.h | 15 +-- .../vectorization/vectorizer_registry_test.cc | 11 +- .../data/vectorization/wrapped_tensor.h | 44 +++++++ .../optimizers/data/vectorization_utils.cc | 116 +++++++++--------- 9 files changed, 144 insertions(+), 105 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD index 37aa24b947..985d6c6c3a 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD +++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD @@ -12,10 +12,20 @@ VECTORIZER_DEPS = [ "//tensorflow/core/grappler/optimizers/data:graph_utils", ] + tf_protos_all() +cc_library( + name = "wrapped_tensor", + hdrs = ["wrapped_tensor.h"], + deps = [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:lib", + ], +) + cc_library( name = "vectorizer", hdrs = ["vectorizer.h"], deps = [ + ":wrapped_tensor", "//tensorflow/core:core_cpu", "//tensorflow/core:lib", ] + tf_protos_all(), diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc index 3af6bab409..f445157531 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc @@ -19,13 +19,13 @@ limitations under the License. namespace tensorflow { namespace grappler { -namespace vectorization_utils { +namespace { class CastVectorizer : public Vectorizer { public: Status Vectorize(const Node& node, Graph* outer_scope, - std::vector* input_ports, - std::vector* output_ports) override { + std::vector&& inputs, + std::vector* outputs) override { Status s; if (node.num_inputs() != 1) { return errors::Internal("Cast op should only have one input."); @@ -35,15 +35,17 @@ class CastVectorizer : public Vectorizer { auto new_cast_node = outer_scope->AddNode(node.def(), &s); TF_RETURN_IF_ERROR(s); - // Add input and output mappings - input_ports->push_back({new_cast_node, 0}); - output_ports->push_back({new_cast_node, 0}); + outer_scope->AddEdge(inputs[0].node, inputs[0].output_index, new_cast_node, + 0); + + // Add output mappings + outputs->push_back({new_cast_node, 0, true}); return Status::OK(); } }; REGISTER_VECTORIZER("Cast", CastVectorizer); -} // namespace vectorization_utils +} // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc index 74ce520ce1..f1ba741821 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc @@ -19,15 +19,15 @@ limitations under the License. namespace tensorflow { namespace grappler { -namespace vectorization_utils { +namespace { class UnpackVectorizer : public Vectorizer { public: Status Vectorize(const Node& node, Graph* outer_scope, - std::vector* input_ports, - std::vector* output_ports) override { + std::vector&& inputs, + std::vector* outputs) override { Status s; - if (node.num_inputs() != 1) { + if (node.num_inputs() != 1 || inputs.size() != 1) { return errors::Internal("Unpack op should only have one input."); } @@ -39,13 +39,13 @@ class UnpackVectorizer : public Vectorizer { int new_axis = node.def().attr().at("axis").i() + 1; new_unpack_node->AddAttr("axis", new_axis); - // Add the input mappings - input_ports->push_back({new_unpack_node, 0}); + outer_scope->AddEdge(inputs[0].node, inputs[0].output_index, + new_unpack_node, 0); // Add the output mappings int num = node.def().attr().at("num").i(); for (int i = 0; i < num; ++i) { - output_ports->push_back({new_unpack_node, i}); + outputs->push_back({new_unpack_node, i, true}); } return Status::OK(); @@ -54,6 +54,6 @@ class UnpackVectorizer : public Vectorizer { REGISTER_VECTORIZER("Unpack", UnpackVectorizer); -} // namespace vectorization_utils +} // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h index 56eb88c95e..8d4676aae0 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h +++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h @@ -18,15 +18,12 @@ limitations under the License. #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { namespace grappler { -namespace vectorization_utils { - -// Describes a tensor with its operation Node and output position -typedef std::pair Port; // Interface for vectorization of TensorFlow operations. See `CastVectorizer` // for an example. @@ -36,17 +33,17 @@ class Vectorizer { // Vectorizes an operation, `node`, by adding Node(s) to `outer_scope` // that produce the same vector output(s) as executing `node`'s op - // on elements of the vector inputs. The new Node(s) collectively have the + // on elements of `inputs`. The new Node(s) collectively have the // same number of input and output ports as the node being converted. - // Adds mappings for the new nodes' input and output ports to `inputs` and - // `outputs` respectively, where the i'th Port in inputs/outputs - // corresponds to the i'th input/output port of the node to be converted. + // Adds edges between the newly created nodes and nodes in `inputs`, and adds + // mappings to the new nodes' output ports to `outputs`, where the i'th + // value in `outputs` corresponds to the i'th output port of the node + // to be converted. virtual Status Vectorize(const Node& node, Graph* outer_scope, - std::vector* input_ports, - std::vector* output_ports) = 0; + std::vector&& inputs, + std::vector* outputs) = 0; }; -} // namespace vectorization_utils } // namespace grappler } // namespace tensorflow #endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc index a6551e36ac..e1cf77a7d5 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc @@ -19,7 +19,6 @@ limitations under the License. namespace tensorflow { namespace grappler { -namespace vectorization_utils { VectorizerRegistry* VectorizerRegistry::Global() { static VectorizerRegistry* registry = new VectorizerRegistry; @@ -42,6 +41,5 @@ void VectorizerRegistry::Register(const string& op_type, vectorizers_.insert(std::pair>( op_type, std::move(vectorizer))); } -} // namespace vectorization_utils } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h index 16159d47ca..ad54c74933 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h +++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h @@ -23,7 +23,6 @@ limitations under the License. namespace tensorflow { namespace grappler { -namespace vectorization_utils { // A global VectorizerRegistry is used to hold all the vectorizers. class VectorizerRegistry { @@ -59,16 +58,12 @@ class VectorizerRegistration { #define REGISTER_VECTORIZER_UNIQ_HELPER(ctr, op_type, vectorizer) \ REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer) -#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer) \ - static ::tensorflow::grappler::vectorization_utils:: \ - vectorizer_registration::VectorizerRegistration \ - vectorizer_registration_##ctr( \ - op_type, \ - ::std::unique_ptr< \ - ::tensorflow::grappler::vectorization_utils::Vectorizer>( \ - new vectorizer())) +#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer) \ + static ::tensorflow::grappler::vectorizer_registration:: \ + VectorizerRegistration vectorizer_registration_##ctr( \ + op_type, ::std::unique_ptr<::tensorflow::grappler::Vectorizer>( \ + new vectorizer())) -} // namespace vectorization_utils } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc index 663ceba027..054aeb9a8f 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc @@ -20,13 +20,12 @@ limitations under the License. namespace tensorflow { namespace grappler { -namespace vectorization_utils { class TestVectorizer : public Vectorizer { public: Status Vectorize(const Node& node, Graph* outer_scope, - std::vector* inputs, - std::vector* outputs) override { + std::vector&& inputs, + std::vector* outputs) override { return Status::OK(); } }; @@ -43,10 +42,10 @@ TEST(TestVectorizer, TestTestVectorizer) { NodeDef node_def; Status s; Node* node = g.AddNode(node_def, &s); - std::vector inputs, outputs; - EXPECT_TRUE(vectorizer->Vectorize(*node, &g, &inputs, &outputs).ok()); + std::vector inputs, outputs; + EXPECT_TRUE( + vectorizer->Vectorize(*node, &g, std::move(inputs), &outputs).ok()); } -} // namespace vectorization_utils } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h new file mode 100644 index 0000000000..4439b4ab4e --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h @@ -0,0 +1,44 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_ + +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { +namespace grappler { + +// Represents a tensor that has been vectorized. +struct WrappedTensor { + Node* const node; + const int output_index; + + // Whether the tensor is stacked, i.e. represents the results of applying + // the operation on all slices of the input, where each row i of the + // tensor corresponds to the op's output on slice i of the input. False + // if the tensor is not stacked, i.e. represents the result of the op on + // a single slice of the input, where the result does not vary between + // slices. + bool stacked; + + WrappedTensor(Node* node, int output_index, bool stacked) + : node(node), output_index(output_index), stacked(stacked) {} +}; + +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_ diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index 344c420902..ba857ab5d9 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -45,22 +45,6 @@ namespace { // Describes a tensor with its operation Node and output position typedef std::pair TensorDesc; -// Equivalent to python Pfor's WrappedTensor struct -struct WrappedTensor { - TensorDesc tensor; - - // Whether the tensor is stacked, i.e. represents the results of applying - // the operation on all slices of the input, where each row i of the - // tensor corresponds to the op's output on slice i of the input. False - // if the tensor is not stacked, i.e. represents the result of the op on - // a single slice of the input, where the result does not vary between - // slices. - bool stacked; - - WrappedTensor(TensorDesc&& tensor, bool stacked) - : tensor(std::move(tensor)), stacked(stacked) {} -}; - const char* const kRetValOp = "_Retval"; void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src, @@ -239,34 +223,48 @@ Status Vectorization::AddConversionMapping(Node* op_node) { return errors::Unimplemented("No vectorizer registered for op: ", op_node->type_string()); } - std::vector input_ports, output_ports; - input_ports.reserve(op_node->num_inputs()); - output_ports.reserve(op_node->num_outputs()); - TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(), - &input_ports, &output_ports)); + std::vector inputs, outputs; + inputs.reserve(op_node->num_inputs()); + outputs.reserve(op_node->num_outputs()); std::vector input_edges; TF_RETURN_IF_ERROR(op_node->input_edges(&input_edges)); - if (op_node->num_outputs() != output_ports.size() || - op_node->num_inputs() != input_ports.size() || - input_edges.size() != input_ports.size()) { - return errors::Internal("Vectorizer inputs/outputs don't match."); - } - - // Promote the inputs of the op to MapDefun outputs and connect the edges - // accordingly. + // The inputs for the node to be converted may already have been converted + // themselves. For those that are not, we promote them to MapDefun outputs. for (size_t i = 0; i < op_node->num_inputs(); ++i) { auto edge = input_edges[i]; - TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_, - {edge->src(), edge->src_output()})); - outer_scope_->AddEdge(map_defun_node_, map_defun_fn_->ret_nodes.size() - 1, - input_ports[i].first, input_ports[i].second); + if (auto found = gtl::FindOrNull(conversion_map_, + {edge->src(), edge->src_output()})) { + inputs.push_back(*found); + } else { + // TODO(rachelim): Handle the case where unconverted inputs are unstacked. + // We assume that all unconverted inputs will be stacked, since we + // converted all unstacked nodes in `Initialize`. However, it's actually + // possible that yet-unconverted nodes may produce unstacked outputs after + // they are vectorized. (For example, see the "Shape" converter in + // tensorflow/python/ops/parallel_for/pfor.py). If a vectorizer expects + // an unstacked input but receives a stacked one, vectorizer->Vectorize + // will return an error. + TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_, + {edge->src(), edge->src_output()})); + int output_index = map_defun_fn_->ret_nodes.size() - 1; + inputs.push_back({map_defun_node_, output_index, true}); + } + } + + TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(), + std::move(inputs), &outputs)); + + if (op_node->num_outputs() != outputs.size()) { + return errors::Internal( + "Number of vectorizer outputs does not match. Expected: ", + op_node->num_outputs(), " Actual: ", outputs.size()); } // Add output mappings. for (size_t i = 0; i < op_node->num_outputs(); ++i) { - conversion_map_.insert({{op_node, i}, {std::move(output_ports[i]), true}}); + conversion_map_.insert({{op_node, i}, outputs[i]}); } return Status::OK(); @@ -281,25 +279,22 @@ Status Vectorization::ConvertOutput(int output_position) { TensorDesc output({ret_edge->src(), ret_edge->src_output()}); TensorDesc converted_output; - if (auto found = gtl::FindOrNull(conversion_map_, output)) { - // It's possible the output already has a mapping, if it comes from a node - // that has already been converted. - if (found->stacked) { - converted_output = found->tensor; - } else { - // Some outputs may be unstacked if they don't derive from arg nodes - // (for example, if a function returns a constant). For these, we - // have to add extra nodes to tile it in the 0th dimension. - TF_RETURN_IF_ERROR(StackTensor(found, &converted_output)); - } - } else { - // Note: All unstacked nodes are converted ahead of time in `Initialize`, - // and here we assume that all op vectorizers create only stacked outputs. - // This may not hold in the future, as more vectorizers are added that - // may actually create unstacked outputs. For example, see the `Shape` - // converter in third_party/tensorflow/python/ops/parallel_for/pfor.py + + // It's possible the output already has a mapping, if it comes from a node + // that has already been converted. + auto found = gtl::FindOrNull(conversion_map_, output); + if (!found) { TF_RETURN_IF_ERROR(AddConversionMapping(output.first)); - converted_output = conversion_map_.at(output).tensor; + found = &conversion_map_.at(output); + } + + if (found->stacked) { + converted_output = {found->node, found->output_index}; + } else { + // Some outputs may be unstacked if they don't derive from arg nodes + // (for example, if a function returns a constant). For these, we + // have to add extra nodes to tile it in the 0th dimension. + TF_RETURN_IF_ERROR(StackTensor(found, &converted_output)); } ReplaceEdgeSources({map_defun_node_, output_position}, converted_output, @@ -455,7 +450,7 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked, Node* ones_shape; TF_RETURN_IF_ERROR(node_builder("Shape") - .Input(unstacked->tensor.first) // input + .Input(unstacked->node) // input .Finalize(g, &ones_shape)); Node* ones; @@ -473,8 +468,8 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked, Node* expand_dims; TF_RETURN_IF_ERROR(node_builder("ExpandDims") - .Input(unstacked->tensor.first) // input - .Input(const_0) // dim + .Input(unstacked->node) // input + .Input(const_0) // dim .Finalize(g, &expand_dims)); TF_RETURN_IF_ERROR(node_builder("Tile") @@ -491,11 +486,11 @@ Status Vectorization::AddArgNodeMappings() { TF_RETURN_IF_ERROR(map_defun_node_->input_node( arg_node->attrs().Find("index")->i(), &input_node)); - conversion_map_.insert({{arg_node, 0}, {{input_node, 0}, true}}); + conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}}); // Control inputs conversion_map_.insert({{arg_node, Graph::kControlSlot}, - {{input_node, Graph::kControlSlot}, true}}); + {input_node, Graph::kControlSlot, true}}); } return Status::OK(); } @@ -541,7 +536,7 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor, if (auto found = gtl::FindOrNull(conversion_map_, {edge->src(), edge->src_output()})) { - outer_scope_->AddEdge(found->tensor.first, found->tensor.second, node, + outer_scope_->AddEdge(found->node, found->output_index, node, edge->dst_input()); } else { status->Update(errors::Internal( @@ -552,11 +547,10 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor, // Add output mappings for (int i = 0; i < tensor.first->num_outputs(); ++i) { - conversion_map_.insert( - {{tensor.first, i}, WrappedTensor({node, i}, false)}); + conversion_map_.insert({{tensor.first, i}, WrappedTensor(node, i, false)}); } conversion_map_.insert({{tensor.first, Graph::kControlSlot}, - WrappedTensor({node, Graph::kControlSlot}, false)}); + WrappedTensor(node, Graph::kControlSlot, false)}); return true; } -- GitLab From 4831740f90eaf266a99d3ffa7d390d54325b689f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 5 Oct 2018 17:05:17 -0700 Subject: [PATCH 039/411] [XLA:GPU] Remove hidden flag for disabling heuristic layout assignment. Heuristic NCHW/NHWC layout assignment works great; we've never had to flip this flag. Might as well remove it and simplify things a bit. PiperOrigin-RevId: 215989807 --- tensorflow/compiler/xla/service/gpu/BUILD | 11 ------- .../xla/service/gpu/gpu_layout_assignment.cc | 11 ++----- .../compiler/xla/service/gpu/gpu_options.cc | 28 ---------------- .../compiler/xla/service/gpu/gpu_options.h | 33 ------------------- 4 files changed, 2 insertions(+), 81 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.cc delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.h diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 7b84f691f6..350fd32537 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -781,7 +781,6 @@ cc_library( srcs = ["gpu_layout_assignment.cc"], hdrs = ["gpu_layout_assignment.h"], deps = [ - ":gpu_options", ":ir_emission_utils", ":stream_executor_util", "//tensorflow/compiler/xla:shape_util", @@ -882,16 +881,6 @@ cc_library( ], ) -cc_library( - name = "gpu_options", - srcs = ["gpu_options.cc"], - hdrs = ["gpu_options.h"], - deps = [ - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/core:lib_internal", - ], -) - cc_library( name = "stream_executor_util", srcs = ["stream_executor_util.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 74352f26aa..1ffe855750 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/layout_util.h" -#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -125,14 +124,8 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall( DataLayout input; FilterLayout filter; DataLayout output; - if (ConvUseLayoutHeuristic(instr->GetModule()->config())) { - std::tie(input, filter, output) = - HeuristicLayoutAssignment(instr, stream_executor_); - } else { - input = DataLayout::kBatchDepthYX; - filter = FilterLayout::kOutputInputYX; - output = DataLayout::kBatchDepthYX; - } + std::tie(input, filter, output) = + HeuristicLayoutAssignment(instr, stream_executor_); TF_ASSIGN_OR_RETURN( std::tie(*input_shape->mutable_layout(), diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/compiler/xla/service/gpu/gpu_options.cc deleted file mode 100644 index 35b4b4e20b..0000000000 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" -#include "tensorflow/core/lib/gtl/map_util.h" - -namespace xla { -namespace gpu { - -bool ConvUseLayoutHeuristic(const HloModuleConfig& config) { - return !config.debug_options().xla_backend_extra_options().count( - "xla_gpu_experimental_conv_disable_layout_heuristic"); -} - -} // namespace gpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/compiler/xla/service/gpu/gpu_options.h deleted file mode 100644 index 498d4a9495..0000000000 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ - -#include "tensorflow/compiler/xla/service/hlo_module_config.h" - -// Helper functions for querying options that are specific to the GPU backend. - -namespace xla { -namespace gpu { - -// Returns true if we should use heuristics to assign convolution layouts, as -// opposed to always assigning NCHW. -bool ConvUseLayoutHeuristic(const HloModuleConfig& config); - -} // namespace gpu -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ -- GitLab From 213d76a6ed77a696883502c53a3a4f81d2ee4042 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Fri, 5 Oct 2018 17:34:30 -0700 Subject: [PATCH 040/411] Simply the logic for bubbling captured tensors when building cond_v2 grad. The current logic tries to bubble the forward pass tensor to the outermost graph. That might not always be do-able e.g. when the cond is inside a while loop it will need to know accumulator logic for while_loop. So instead, the cond_grad now captures tensors from the forward If op's graph. When the grad If op is built these tensors will be appropriately captured by the surrounding FuncGraph. PiperOrigin-RevId: 215993009 --- .../kernel_tests/control_flow_ops_py_test.py | 6 +-- tensorflow/python/ops/cond_v2_impl.py | 48 ++++++++----------- 2 files changed, 22 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 7fae5249aa..baea5c0f6d 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -661,8 +661,7 @@ class ControlFlowTest(test.TestCase): sess.run(r) def testCondGrad_1(self): - graph = ops.Graph() - with graph.as_default(): + with self.cached_session(): x = constant_op.constant(10.0, name="x") pred = math_ops.less(1, 2) fn1 = lambda: array_ops.identity(x) @@ -670,8 +669,7 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.cond(pred, fn1, fn2) grad = gradients_impl.gradients(r, [x])[0] - with self.cached_session(): - self.assertAllEqual(1.0, grad.eval()) + self.assertAllEqual(1.0, grad.eval()) def testCondGrad_2(self): with self.cached_session(): diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py index 195ad11c71..c9aa4d4889 100644 --- a/tensorflow/python/ops/cond_v2_impl.py +++ b/tensorflow/python/ops/cond_v2_impl.py @@ -282,9 +282,10 @@ def _resolve_grad_inputs(cond_graph, grad_graph): as is. 2. Tensors in the forward pass graph. These tensors may not be "live" when the gradient is being computed. We replace such references by their - corresponding tensor in the least common ancestor graph of `grad_graph` and - `cond_graph`. Since we export intermediate tensors for all branch - functions, this is always possible. + corresponding tensor in `cond_graph.outer_graph`. In the case of nested + control flow or functions, the gradient logic handling + `grad_graph.outer_graph` will make sure the tensor from + `cond_graph.outer_graph` is also correctly captured. Args: cond_graph: function.FuncGraph. The forward-pass function. @@ -296,24 +297,23 @@ def _resolve_grad_inputs(cond_graph, grad_graph): new_inputs = [] for t in grad_graph.external_captures: + # `t` must either be in `grad_graph.outer_graph` or in the forward + # `cond_graph`. if t.graph != grad_graph.outer_graph: - # `t` is a tensor in `cond_graph` or one of its ancestors. We bubble this - # tensor to the least common ancestor of the `cond_graph` and - # `grad_graph` so that it is "in-scope" for `grad_graph`. - # TODO(srbs): `_is_ancestor` calls may be expensive. Compute the least - # common ancestor once and re-use. - assert _is_ancestor(cond_graph, t.graph) - while not _is_ancestor(grad_graph, t.graph): - assert isinstance(t.graph, _function.FuncGraph) - if t in t.graph.internal_captures: - # TODO(srbs): Consider building a map of internal_captures -> - # external_captures instead of searching for `t` twice. - t = t.graph.external_captures[t.graph.internal_captures.index(t)] - else: - # Note: All intermediate tensors are output by the If op. - # TODO(srbs): .index() calls may be expensive. Optimize. - t = t.graph._if.outputs[t.graph.outputs.index(t)] - assert _is_ancestor(grad_graph, t.graph) + assert t.graph == cond_graph + # `internal_captures` are not treated as intermediates and hence not added + # to If op outputs. So we get the outer tensor corresponding to those + # from the list of `external_captures`. + try: + t = t.graph._if.outputs[t.graph.outputs.index(t)] + except ValueError: + index = t.graph.internal_captures.index(t) + t = t.graph.external_captures[index] + + # Note: We rely on the capturing logic of the gradient If op graph to + # correctly capture the tensors in `cond_graph.outer_graph`. Both cond_v2 + # and while_v2 handle this while building their gradient functions. + assert t.graph == cond_graph.outer_graph new_inputs.append(t) return new_inputs @@ -492,11 +492,3 @@ def _get_output_shapes(true_graph_outputs, false_graph_outputs): for t_out, f_out in zip(true_graph_outputs, false_graph_outputs) ] return output_shapes - - -def _is_ancestor(graph, maybe_ancestor): - if maybe_ancestor == graph: - return True - if isinstance(graph, _function.FuncGraph): - return _is_ancestor(graph.outer_graph, maybe_ancestor) - return False -- GitLab From 1484bad99cfd46cb63a839643cfce917b6f0cdd8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 19:18:32 -0700 Subject: [PATCH 041/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216000752 --- .../core/ops/compat/ops_history.v1.pbtxt | 224 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 59 +++++ 2 files changed, 283 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 780c6f6448..0753316724 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -70896,6 +70896,62 @@ op { } } } +op { + name: "StatelessRandomNormal" + input_arg { + name: "shape" + type_attr: "T" + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "StatelessRandomUniform" input_arg { @@ -70993,6 +71049,118 @@ op { } } } +op { + name: "StatelessRandomUniform" + input_arg { + name: "shape" + type_attr: "T" + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "StatelessRandomUniformInt" + input_arg { + name: "shape" + type_attr: "T" + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + input_arg { + name: "minval" + type_attr: "dtype" + } + input_arg { + name: "maxval" + type_attr: "dtype" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "StatelessTruncatedNormal" input_arg { @@ -71090,6 +71258,62 @@ op { } } } +op { + name: "StatelessTruncatedNormal" + input_arg { + name: "shape" + type_attr: "T" + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "StatelessWhile" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 0d8997c1bd..14cc9df9a2 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -32978,6 +32978,7 @@ op { allowed_values { list { type: DT_HALF + type: DT_BFLOAT16 type: DT_FLOAT type: DT_DOUBLE } @@ -33033,6 +33034,7 @@ op { allowed_values { list { type: DT_HALF + type: DT_BFLOAT16 type: DT_FLOAT type: DT_DOUBLE } @@ -33065,6 +33067,62 @@ op { } } } +op { + name: "StatelessRandomUniformInt" + input_arg { + name: "shape" + type_attr: "T" + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + input_arg { + name: "minval" + type_attr: "dtype" + } + input_arg { + name: "maxval" + type_attr: "dtype" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "StatelessTruncatedNormal" input_arg { @@ -33088,6 +33146,7 @@ op { allowed_values { list { type: DT_HALF + type: DT_BFLOAT16 type: DT_FLOAT type: DT_DOUBLE } -- GitLab From 45f594a0bce42787356700c0e20f5fbc47193fa3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 19:45:59 -0700 Subject: [PATCH 042/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216001984 --- tensorflow/go/op/wrappers.go | 712 +++++++++++++++++------------------ 1 file changed, 356 insertions(+), 356 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a7bbb80c82..5d17605e37 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -9640,36 +9640,6 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. -// -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. -// -// Returns a `Tensor` of same shape and type as the elements of `inputs`. -// -// Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "AccumulateNV2", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // RandomShuffleAttr is an optional argument to RandomShuffle. type RandomShuffleAttr func(optionalAttr) @@ -10383,206 +10353,65 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf. return scope.AddOperation(opspec) } -// Encode audio data using the WAV file format. -// -// This operation will generate a string suitable to be saved out to create a .wav -// audio file. It will be encoded in the 16-bit PCM format. It takes in float -// values in the range -1.0f to 1.0f, and any outside that value will be clamped to -// that range. -// -// `audio` is a 2-D float Tensor of shape `[length, channels]`. -// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). -// -// Arguments: -// audio: 2-D with shape `[length, channels]`. -// sample_rate: Scalar containing the sample frequency. -// -// Returns 0-D. WAV-encoded file contents. -func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "EncodeWav", - Input: []tf.Input{ - audio, sample_rate, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes atan of x element-wise. -func Atan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atan", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. -type ResourceApplyAdaMaxAttr func(optionalAttr) - -// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the AdaMax algorithm. +// Locks a mutex resource. The output is the lock. So long as the lock tensor // -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// v_t <- max(beta2 * v_{t-1}, abs(g)) -// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) +// is alive, any other request to use `MutexLock` with this mutex will wait. // -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. +// This is particularly useful for creating a critical section when used in +// conjunction with `MutexLockIdentity`: // -// Returns the created operation. -func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdaMax", - Input: []tf.Input{ - var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) - -// AssertSummarize sets the optional summarize attribute to value. +// ```python // -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Asserts that the given condition is true. +// mutex = mutex_v2( +// shared_name=handle_name, container=container, name=name) // -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. +// def execute_in_critical_section(fn, *args, **kwargs): +// lock = gen_resource_variable_ops.mutex_lock(mutex) // -// Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. +// with ops.control_dependencies([lock]): +// r = fn(*args, **kwargs) // -// Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Assert", - Input: []tf.Input{ - condition, tf.OutputList(data), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Split a `SparseTensor` into `num_split` tensors along one dimension. +// with ops.control_dependencies(nest.flatten(r)): +// with ops.colocate_with(mutex): +// ensure_lock_exists = mutex_lock_identity(lock) // -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is +// # Make sure that if any element of r is accessed, all of +// # them are executed together. +// r = nest.map_structure(tf.identity, r) // -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] +// with ops.control_dependencies([ensure_lock_exists]): +// return nest.map_structure(tf.identity, r) +// ``` // -// Graphically the output tensors are: +// While `fn` is running in the critical section, no other functions which wish to +// use this critical section may run. // -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] +// Often the use case is that two executions of the same graph, in parallel, +// wish to run `fn`; and we wish to ensure that only one of them executes +// at a time. This is especially important if `fn` modifies one or more +// variables at a time. // -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] +// It is also useful if two separate functions must share a resource, but we +// wish to ensure the usage is exclusive. // // Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. +// mutex: The mutex resource to lock. // -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { +// Returns A tensor that keeps a shared pointer to a lock on the mutex; +// when the Tensor is destroyed, the use count on the shared pointer is decreased +// by 1. When it reaches 0, the lock is released. +func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "SparseSplit", + Type: "MutexLock", Input: []tf.Input{ - split_dim, indices, values, shape, + mutex, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape + return op.Output(0) } // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. @@ -11611,89 +11440,321 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN a(attrs) } opspec := tf.OpSpec{ - Type: "StringToNumber", + Type: "StringToNumber", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) + +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. +// +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrlV2", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// +// This Op does not require `a_indices` be sorted in standard lexicographic order. +// +// Arguments: +// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. +// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. +// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. +// b: `ndims`-D Tensor. With shape `a_shape`. +func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseTensorDenseAdd", + Input: []tf.Input{ + a_indices, a_values, a_shape, b, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Encode audio data using the WAV file format. +// +// This operation will generate a string suitable to be saved out to create a .wav +// audio file. It will be encoded in the 16-bit PCM format. It takes in float +// values in the range -1.0f to 1.0f, and any outside that value will be clamped to +// that range. +// +// `audio` is a 2-D float Tensor of shape `[length, channels]`. +// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). +// +// Arguments: +// audio: 2-D with shape `[length, channels]`. +// sample_rate: Scalar containing the sample frequency. +// +// Returns 0-D. WAV-encoded file contents. +func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EncodeWav", + Input: []tf.Input{ + audio, sample_rate, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes atan of x element-wise. +func Atan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Atan", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. +type ResourceApplyAdaMaxAttr func(optionalAttr) + +// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, m, and v tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the AdaMax algorithm. +// +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// v_t <- max(beta2 * v_{t-1}, abs(g)) +// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// v: Should be from a Variable(). +// beta1_power: Must be a scalar. +// lr: Scaling factor. Must be a scalar. +// beta1: Momentum factor. Must be a scalar. +// beta2: Momentum factor. Must be a scalar. +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAdaMax", + Input: []tf.Input{ + var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// AssertAttr is an optional argument to Assert. +type AssertAttr func(optionalAttr) + +// AssertSummarize sets the optional summarize attribute to value. +// +// value: Print this many entries of each tensor. +// If not specified, defaults to 3 +func AssertSummarize(value int64) AssertAttr { + return func(m optionalAttr) { + m["summarize"] = value + } +} + +// Asserts that the given condition is true. +// +// If `condition` evaluates to false, print the list of tensors in `data`. +// `summarize` determines how many entries of the tensors to print. +// +// Arguments: +// condition: The condition to evaluate. +// data: The tensors to print out when condition is false. +// +// Returns the created operation. +func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Assert", Input: []tf.Input{ - string_tensor, + condition, tf.OutputList(data), }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) - -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// Split a `SparseTensor` into `num_split` tensors along one dimension. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. +// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices +// `[0 : shape[split_dim] % num_split]` gets one extra dimension. +// For example, if `split_dim = 1` and `num_split = 2` and the input is // -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. +// Graphically the output tensors are: // -// lr_power: Scaling factor. Must be a scalar. +// output_tensor[0] = shape = [2, 4] +// [ a ] +// [b c ] // -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +// output_tensor[1] = shape = [2, 3] +// [ d e ] +// [ ] +// +// Arguments: +// split_dim: 0-D. The dimension along which to split. Must be in the range +// `[0, rank(shape))`. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// num_split: The number of ways to split. +// +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "SparseSplit", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + split_dim, indices, values, shape, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + return output_indices, output_values, output_shape } -// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// Returns the element-wise sum of a list of tensors. // -// This Op does not require `a_indices` be sorted in standard lexicographic order. +// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +// wait for all of its inputs to be ready before beginning to sum. This can +// save memory if inputs are ready at different times, since minimum temporary +// storage is proportional to the output size rather than the inputs size. +// +// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. +// +// Returns a `Tensor` of same shape and type as the elements of `inputs`. // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. -// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. -// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. -// b: `ndims`-D Tensor. With shape `a_shape`. -func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { +// inputs: A list of `Tensor` objects, each with same shape and type. +// shape: Shape of elements of `inputs`. +func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "SparseTensorDenseAdd", + Type: "AccumulateNV2", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + tf.OutputList(inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -13925,67 +13986,6 @@ func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_ return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Locks a mutex resource. The output is the lock. So long as the lock tensor -// -// is alive, any other request to use `MutexLock` with this mutex will wait. -// -// This is particularly useful for creating a critical section when used in -// conjunction with `MutexLockIdentity`: -// -// ```python -// -// mutex = mutex_v2( -// shared_name=handle_name, container=container, name=name) -// -// def execute_in_critical_section(fn, *args, **kwargs): -// lock = gen_resource_variable_ops.mutex_lock(mutex) -// -// with ops.control_dependencies([lock]): -// r = fn(*args, **kwargs) -// -// with ops.control_dependencies(nest.flatten(r)): -// with ops.colocate_with(mutex): -// ensure_lock_exists = mutex_lock_identity(lock) -// -// # Make sure that if any element of r is accessed, all of -// # them are executed together. -// r = nest.map_structure(tf.identity, r) -// -// with ops.control_dependencies([ensure_lock_exists]): -// return nest.map_structure(tf.identity, r) -// ``` -// -// While `fn` is running in the critical section, no other functions which wish to -// use this critical section may run. -// -// Often the use case is that two executions of the same graph, in parallel, -// wish to run `fn`; and we wish to ensure that only one of them executes -// at a time. This is especially important if `fn` modifies one or more -// variables at a time. -// -// It is also useful if two separate functions must share a resource, but we -// wish to ensure the usage is exclusive. -// -// Arguments: -// mutex: The mutex resource to lock. -// -// Returns A tensor that keeps a shared pointer to a lock on the mutex; -// when the Tensor is destroyed, the use count on the shared pointer is decreased -// by 1. When it reaches 0, the lock is released. -func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MutexLock", - Input: []tf.Input{ - mutex, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // StringFormatAttr is an optional argument to StringFormat. type StringFormatAttr func(optionalAttr) @@ -16807,26 +16807,6 @@ func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values return op.Output(0), op.Output(1) } -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). -// -// The Hurwitz zeta function is defined as: -// -// -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Zeta", - Input: []tf.Input{ - x, q, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns a list of tensors with the same shapes and contents as the input // // tensors. @@ -18873,6 +18853,26 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D return op.Output(0) } +// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). +// +// The Hurwitz zeta function is defined as: +// +// +// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) +func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Zeta", + Input: []tf.Input{ + x, q, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Inverse fast Fourier transform. // // Computes the inverse 1-dimensional discrete Fourier transform over the @@ -22757,6 +22757,21 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output return op.Output(0) } +// Computes hyperbolic tangent of `x` element-wise. +func Tanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tanh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the maximum along segments of a tensor. // // Read @@ -22794,21 +22809,6 @@ func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf. return op.Output(0) } -// Computes hyperbolic tangent of `x` element-wise. -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tanh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that skips `count` elements from the `input_dataset`. // // Arguments: -- GitLab From 7d3bfc143a74d8e49f138841a07f7f4693b0a911 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 5 Oct 2018 20:07:12 -0700 Subject: [PATCH 043/411] Add the plumbing for an autograph flag to defun. Disabled and experimental for now. PiperOrigin-RevId: 216003028 --- tensorflow/python/eager/BUILD | 1 + tensorflow/python/eager/function.py | 61 +++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index d0c1a93118..cae809a7c3 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -251,6 +251,7 @@ py_library( "//tensorflow/python:gradients_impl", "//tensorflow/python:graph_to_function_def", "//tensorflow/python:util", + "//tensorflow/python/autograph", "//tensorflow/python/eager:context", "//tensorflow/python/eager:core", "//tensorflow/python/eager:execute", diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f06148b5d2..bafe07de2b 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -31,6 +31,7 @@ import six from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import function_pb2 +from tensorflow.python import autograph from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import execute @@ -877,7 +878,8 @@ def func_graph_from_py_func(name, args, kwargs, signature=None, - func_graph=None): + func_graph=None, + experimental_autograph=False): """Returns a `FuncGraph` generated from `python_func`. Args: @@ -894,6 +896,8 @@ def func_graph_from_py_func(name, inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. + experimental_autograph: whether to use autograph to compile `python_func`. + See https://www.tensorflow.org/guide/autograph for more information. Returns: A FuncGraph. @@ -939,7 +943,17 @@ def func_graph_from_py_func(name, this_tape = tape.push_new_tape() try: - func_outputs = python_func(*func_args, **func_kwargs) + if experimental_autograph: + func_outputs = autograph.converted_call( + python_func, + autograph.ConversionOptions( + verbose=True, + recursive=True, + force_conversion=False, + strip_decorators=(defun,), + arg_types={}), *func_args, **func_kwargs) + else: + func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors and `None`s. func_outputs = nest.map_structure(convert, func_outputs) @@ -1035,7 +1049,8 @@ class PolymorphicFunction(object): python_function, name, input_signature=None, - attributes=None): + attributes=None, + experimental_autograph=False): """Initializes a polymorphic function. Args: @@ -1045,7 +1060,10 @@ class PolymorphicFunction(object): specifying the input signature of this function. If `None`, a separate function is instantiated for each inferred input signature. attributes: dict, extra keyword arguments that will be added as attribute - of the function. + of the function. + experimental_autograph: whether to use autograph to compile + `python_function`. See https://www.tensorflow.org/guide/autograph for + more information. Raises: ValueError: if `input_signature` is not None and the `python_function`'s @@ -1061,6 +1079,7 @@ class PolymorphicFunction(object): self._args_to_prepend = tuple() self._kwargs_to_include = {} self._name = name + self._experimental_autograph = experimental_autograph self._function_cache = collections.OrderedDict() self._function_attributes = attributes or {} @@ -1286,8 +1305,13 @@ class PolymorphicFunction(object): if graph_function is None: graph_function = Function( - func_graph_from_py_func(self._name, self._python_function, args, - kwargs, self._input_signature), + func_graph_from_py_func( + self._name, + self._python_function, + args, + kwargs, + self._input_signature, + experimental_autograph=self._experimental_autograph), self._function_attributes) self._function_cache[cache_key] = graph_function return graph_function, [ @@ -1348,7 +1372,7 @@ def _validate_signature(signature): "a possibly nested sequence of TensorSpec objects.") -def defun(func=None, input_signature=None): +def defun(func=None, input_signature=None, experimental_autograph=False): """Compiles a Python function into a callable TensorFlow graph. `defun` (short for "define function") trace-compiles a Python function @@ -1657,6 +1681,10 @@ def defun(func=None, input_signature=None): function is instantiated for each inferred input signature. If a signature is specified, every input to `func` must be a `Tensor`, and `func` cannot accept `**kwargs`. + experimental_autograph: Whether `func` should be compiled before + constructing the graph. See https://www.tensorflow.org/guide/autograph + for more information. + Returns: If `func` is not None, returns a callable that will execute the compiled @@ -1668,10 +1696,16 @@ def defun(func=None, input_signature=None): TypeError: If `input_signature` is neither `None` nor a sequence of `tf.contrib.eager.TensorSpec` objects. """ - return defun_with_attributes(func=func, input_signature=input_signature) + return defun_with_attributes( + func=func, + input_signature=input_signature, + experimental_autograph=experimental_autograph) -def defun_with_attributes(func=None, input_signature=None, attributes=None): +def defun_with_attributes(func=None, + input_signature=None, + attributes=None, + experimental_autograph=False): """Compiles a Python function into a callable TensorFlow graph. This function supports adding extra function attributes. See detailed @@ -1686,6 +1720,7 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None): attributes. Currently only support primitive types as value, and only whitelisted attribute name is allowed. Unwhitelisted attribute name or unsupported value will result into ValueError. + experimental_autograph: same as defun()'s experimental_autograph. Returns: Same as the return value of defun, with attributes added to the function in @@ -1702,8 +1737,12 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None): name = "function" return tf_decorator.make_decorator( function, - PolymorphicFunction(function, name, input_signature=input_signature, - attributes=attributes)) + PolymorphicFunction( + function, + name, + input_signature=input_signature, + attributes=attributes, + experimental_autograph=experimental_autograph)) # This code path is for the `foo = tfe.defun(foo, ...)` use case if func is not None: -- GitLab From fb92d456476c36210cea3b76393f584a306f092b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 6 Oct 2018 02:01:17 -0700 Subject: [PATCH 044/411] compat: Update forward compatibility horizon to 2018-10-06 PiperOrigin-RevId: 216021117 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 8f4e8e0b98..d85fb00414 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 5) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6) @tf_export("compat.forward_compatible") -- GitLab From 5c0a6bdfeb1848b0146a36706d921dde06ba160a Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 6 Oct 2018 10:04:16 -0700 Subject: [PATCH 045/411] [XLA] Add base and window dilation support to ReduceWindow PiperOrigin-RevId: 216041507 --- .../tf2xla/kernels/reduce_window_op.cc | 21 +++++++- .../compiler/tf2xla/kernels/scan_ops.cc | 3 +- tensorflow/compiler/tf2xla/ops/xla_ops.cc | 2 + tensorflow/compiler/tf2xla/python/xla.py | 6 +++ tensorflow/compiler/xla/client/xla_builder.cc | 15 ++++-- tensorflow/compiler/xla/client/xla_builder.h | 6 +++ .../xla/python/local_computation_builder.cc | 5 +- .../xla/python/local_computation_builder.h | 2 + tensorflow/compiler/xla/python/xla_client.py | 25 ++++++++- .../xla/service/algebraic_simplifier.cc | 6 +++ .../compiler/xla/service/cpu/ir_emitter.cc | 27 +++++++--- .../xla/service/gpu/elemental_ir_emitter.cc | 26 ++++++---- .../xla/service/hlo_evaluator_test.cc | 52 +++++++++++++++++++ .../xla/service/hlo_evaluator_typed_visitor.h | 13 ++++- .../compiler/xla/tests/reduce_window_test.cc | 12 ++++- 15 files changed, 191 insertions(+), 30 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc index 8102faad28..8eee5b1299 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc @@ -40,10 +40,16 @@ class ReduceWindowOp : public XlaOpKernel { std::vector window_dimensions; std::vector window_strides; + std::vector base_dilations; + std::vector window_dilations; OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( "window_dimensions", &window_dimensions)); OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("window_strides", &window_strides)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("base_dilations", + &base_dilations)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( + "window_dilations", &window_dilations)); const int rank = input_shape.dims(); OP_REQUIRES(context, rank == window_dimensions.size(), @@ -56,6 +62,16 @@ class ReduceWindowOp : public XlaOpKernel { "The size of window_strides must be equal to the input " "rank (", window_strides.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == base_dilations.size(), + errors::InvalidArgument( + "The size of base_dilations must be equal to the input " + "rank (", + base_dilations.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == window_dilations.size(), + errors::InvalidArgument( + "The size of window_dilations must be equal to the input " + "rank (", + window_dilations.size(), " vs. ", rank, ")")); // Build the reducer function. XlaCompiler::Argument reducer_arg; @@ -102,7 +118,8 @@ class ReduceWindowOp : public XlaOpKernel { xla::XlaOp output = xla::ReduceWindowWithGeneralPadding( context->Input(0), context->Input(1), *reducer.computation, - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); context->SetOutput(0, output); } @@ -115,6 +132,8 @@ class ReduceWindowOp : public XlaOpKernel { REGISTER_XLA_OP(Name("XlaReduceWindow") .CompileTimeConstInput("window_dimensions") .CompileTimeConstInput("window_strides") + .CompileTimeConstInput("base_dilations") + .CompileTimeConstInput("window_dilations") .CompileTimeConstInput("padding"), ReduceWindowOp); diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index ab094d7dd1..57afd608de 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -104,7 +104,8 @@ class ScanOp : public XlaOpKernel { } auto output = xla::ReduceWindowWithGeneralPadding( XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init, - *reducer, window_dims, window_strides, padding); + *reducer, window_dims, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding); output = XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0)); diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index 557911553d..bd2c0a5ee8 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -283,6 +283,8 @@ REGISTER_OP("XlaReduceWindow") .Input("init_value: T") .Input("window_dimensions: Tindices") .Input("window_strides: Tindices") + .Input("base_dilations: Tindices") + .Input("window_dilations: Tindices") .Input("padding: Tindices") .Attr("T: numbertype") .Attr("Tindices: {int32, int64}") diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index bc7924c371..5e86b5d8ec 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -320,6 +320,8 @@ def reduce_window(operand, reducer, window_dimensions, window_strides=None, + base_dilations=None, + window_dilations=None, padding=None, name=None): """Wraps the XLA ReduceWindow operator. @@ -343,12 +345,16 @@ def reduce_window(operand, A tensor that represents the output of the reduce_window operator. """ window_strides = window_strides or [1] * len(window_dimensions) + base_dilations = base_dilations or [1] * len(window_dimensions) + window_dilations = window_dilations or [1] * len(window_dimensions) padding = padding or [(0, 0)] * len(window_dimensions) return gen_xla_ops.xla_reduce_window( input=operand, init_value=init, window_dimensions=window_dimensions, window_strides=window_strides, + base_dilations=base_dilations, + window_dilations=window_dilations, padding=padding, computation=reducer, name=name) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index d196252db1..6b31831010 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -1789,9 +1789,9 @@ XlaOp XlaBuilder::ReduceWindow(const XlaOp& operand, const XlaOp& init_value, std::vector> padding_values = MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions, window_strides, padding); - return ReduceWindowWithGeneralPadding(operand, init_value, computation, - window_dimensions, window_strides, - padding_values); + return ReduceWindowWithGeneralPadding( + operand, init_value, computation, window_dimensions, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding_values); }); } @@ -1800,6 +1800,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; @@ -1810,7 +1812,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( computation.GetProgramShape()); TF_ASSIGN_OR_RETURN(*instr.mutable_window(), MakeWindow(window_dimensions, window_strides, padding, - /*lhs_dilation=*/{}, /*rhs_dilation=*/{})); + /*lhs_dilation=*/base_dilations, + /*rhs_dilation=*/window_dilations)); TF_ASSIGN_OR_RETURN( *instr.mutable_shape(), ShapeInference::InferReduceWindowShape(operand_shape, init_shape, @@ -2800,10 +2803,12 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return operand.builder()->ReduceWindowWithGeneralPadding( operand, init_value, computation, window_dimensions, window_strides, - padding); + base_dilations, window_dilations, padding); } XlaOp CrossReplicaSum(const XlaOp& operand, diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index cd0d5ca5d3..2e14e47a35 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -671,6 +671,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All @@ -1245,6 +1247,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); friend XlaOp CrossReplicaSum(const XlaOp& operand, absl::Span replica_groups); @@ -1818,6 +1822,8 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index cd5fd33029..ffa336f304 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -532,10 +532,13 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return xla::ReduceWindowWithGeneralPadding( operand.op(), init_value.op(), local_computation.computation(), - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); } LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu, diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index 2166bb6721..43332e0abd 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -278,6 +278,8 @@ class LocalComputationBuilder { const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span > padding); LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma, diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index bb303c5678..f8197488fb 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -995,7 +995,30 @@ class ComputationBuilder(object): window_strides) return self._client.ReduceWindowWithGeneralPadding( operand, init_value, computation_to_apply.c_local_computation, - window_dimensions, window_strides, pads) + window_dimensions, window_strides, (), (), pads) + + def ReduceWindowWithGeneralPadding( + self, operand, init_value, computation_to_apply, window_dimensions, + window_strides, base_dilations, window_dilations, padding): + """Enqueues a windowed reduction operation onto the computation. + + Args: + operand: reduction operand (LocalOp). + init_value: reduction initial value (LocalOp). + computation_to_apply: a binary reduction function (Computation). + window_dimensions: dimensions of window (sequence of integers). + window_strides: strides for window (sequence of integers). + base_dilations: dilations for the base (sequence of integers). + window_dilations: dilations for window (sequence of integers). + padding: length-N array-like of pairs of integers of (low, high) padding. + + Returns: + A LocalOp representing the added ReduceWindow op. + """ + return self._client.ReduceWindowWithGeneralPadding( + operand, init_value, computation_to_apply.c_local_computation, + window_dimensions, window_strides, base_dilations, window_dilations, + padding) def RngNormal(self, mu, sigma, dims): """Enqueues an RngNormal operation onto the computation. diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 75dae7a714..86d9dbea90 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2057,6 +2057,12 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( return Status::OK(); } + // Bail on dilation. + if (window_util::HasDilation(window)) { + VLOG(10) << "Not folding pad into reduce-window as there is dilation."; + return Status::OK(); + } + VLOG(10) << "Considering folding Pad: " << pad->ToString() << "\ninto reduce-window: " << reduce_window->ToString() << (convert != nullptr diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index a70abb117a..b2abdb39a5 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -688,8 +688,25 @@ StatusOr IrEmitter::EmitTargetElementLoopBodyForReduceWindow( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* strided_index = NSWMul(index[i], b_.getInt64(window.dimensions(i).stride())); - input_index[i] = NSWSub(NSWAdd(strided_index, window_index[i]), - b_.getInt64(window.dimensions(i).padding_low())); + input_index[i] = NSWSub( + NSWAdd(strided_index, + NSWMul(window_index[i], + b_.getInt64(window.dimensions(i).window_dilation()))), + b_.getInt64(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())), + b_.getInt64(0)); + if (in_bounds_condition == nullptr) { + in_bounds_condition = dilation_condition; + } else { + in_bounds_condition = And(in_bounds_condition, dilation_condition); + } + + // Apply base dilation to the index. + input_index[i] = + SDiv(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())); // We need to check if 0 <= input_index[i] < bound, as otherwise we are in // the padding so that we can skip the computation. That is equivalent to @@ -728,12 +745,6 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) { /*operands=*/{reduce_window->operand(0)}, /*supported_types=*/{F32, BF16, S32, F16})); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(reduce_window->window())) { - return Unimplemented( - "Dilation for ReduceWindow is not implemented on CPU."); - } - // Pseudo code for reduce window: // // for (coordinates O in the output) diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index c1aaa4bf04..6dcdaf1cfe 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -358,13 +358,6 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( const HloInstruction* operand = hlo->operand(0); const Window& window = hlo->window(); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(window)) { - return Unimplemented( - "Dilation for reduce-window not implemented on GPU. " - "See b/31410564."); - } - PrimitiveType operand_element_type = operand->shape().element_type(); llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry( llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), @@ -397,9 +390,24 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* stridden_index = NSWMul( index[i], index_typed_const(window.dimensions(i).stride())); + input_index[i] = NSWSub( + NSWAdd(stridden_index, + NSWMul(window_index[i], + index_typed_const( + window.dimensions(i).window_dilation()))), + index_typed_const(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())), + index_typed_const(0)); + in_bounds = And(in_bounds, dilation_condition); + + // Apply base dilation to the index. input_index[i] = - NSWSub(NSWAdd(stridden_index, window_index[i]), - index_typed_const(window.dimensions(i).padding_low())); + SDiv(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())); // We must check whether 0 ≤ input_index[i] < bound, as otherwise // we are in the pad and so can skip the computation. This diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index cee11a8a21..608a42bb60 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1463,6 +1463,58 @@ TEST_P(HloEvaluatorTest, ReduceWindowMax) { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } +TEST_P(HloEvaluatorTest, ReduceWindowMaxWindowDilation) { + HloComputation::Builder b(TestName()); + + // arg: + // f32[3,3] { + // { 1, 2, 3 }, + // { 5, 6, 7 }, + // { 9, 10, 11 }, + // } + auto arg_array = absl::make_unique>(3, 3); + arg_array->FillUnique(1.0f); + auto arg_literal = LiteralUtil::CreateR2FromArray2D(*arg_array); + + HloInstruction* arg_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal))); + + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(0.f))); + + HloComputation::Builder max_computation("max"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + max_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs)); + auto max_func = module().AddEmbeddedComputation(max_computation.Build()); + + Window window; + WindowDimension dim; + dim.set_size(2); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(2); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + Shape shape = ShapeUtil::MakeShape(F32, {1, 1}); + b.AddInstruction(HloInstruction::CreateReduceWindow( + shape, arg_instruction, init_value, window, max_func)); + + module().AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + auto expected = LiteralUtil::CreateR2({{11}}); + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); +} + TEST_P(HloEvaluatorTest, ReduceWindowAdd) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index b2d12c94b8..a450dc6ff5 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -2613,8 +2613,17 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { std::vector base_index(rank); bool out_of_bound = false; for (int64 i = 0; i < rank; ++i) { - base_index[i] = window_count_index[i] * window.dimensions(i).stride() + - window_index[i] - window.dimensions(i).padding_low(); + base_index[i] = + window_count_index[i] * window.dimensions(i).stride() + + window_index[i] * window.dimensions(i).window_dilation() - + window.dimensions(i).padding_low(); + // We are not in the base area if the dilation placed us out of bounds. + if (base_index[i] % window.dimensions(i).base_dilation() != 0) { + out_of_bound = true; + break; + } + // Apply the dilation to the base area. + base_index[i] /= window.dimensions(i).base_dilation(); if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) { out_of_bound = true; break; diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index c25ccafaf8..22fe4a2670 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -638,6 +638,8 @@ class R4ReduceWindowTest : public ReduceWindowTestBase, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, /*padding=*/padding); CHECK(reducer == kAdd || reducer == kMax); @@ -1158,7 +1160,10 @@ class R2ReduceWindowTest : public ReduceWindowTestBase, /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } @@ -1369,7 +1374,10 @@ TEST_P(R1ReduceWindowTest, DoIt) { /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } -- GitLab From e93a18954689b6d522560f5273f6d3320d545b2e Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Sat, 6 Oct 2018 13:49:25 -0700 Subject: [PATCH 046/411] Mark tensorflow/contrib/tpu:datasets_test flaky It fails 1/1000 runs in OSS builds. PiperOrigin-RevId: 216050192 --- tensorflow/contrib/tpu/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 10ed1c2891..8c36d5a297 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -302,6 +302,7 @@ tf_py_test( "//tensorflow/python:client_testlib", ":datasets", ], + flaky = 1, # TODO(b/117363808): fails 1/1000 OSS runs grpc_enabled = True, ) -- GitLab From 7fa6a6b42bc9d562e2b1cc765ca78d281b51f734 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 6 Oct 2018 21:00:57 -0700 Subject: [PATCH 047/411] Add SequenceLSTMOptions to schema to decouple the sequential Op from the LSTM. PiperOrigin-RevId: 216066634 --- tensorflow/contrib/lite/c/builtin_op_data.h | 7 + .../lite/core/api/flatbuffer_conversions.cc | 15 +- .../kernels/unidirectional_sequence_lstm.cc | 14 +- .../unidirectional_sequence_lstm_test.cc | 11 +- tensorflow/contrib/lite/schema/schema.fbs | 8 + .../contrib/lite/schema/schema_generated.h | 162 +++++++++++++++++- 6 files changed, 205 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h index 44daf7adaa..1e65c3cee2 100644 --- a/tensorflow/contrib/lite/c/builtin_op_data.h +++ b/tensorflow/contrib/lite/c/builtin_op_data.h @@ -186,6 +186,13 @@ typedef struct { TfLiteLSTMKernelType kernel_type; } TfLiteLSTMParams; +typedef struct { + // Parameters for the LSTM kernel. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; +} TfLiteUnidirectionalSequenceLSTMParams; + typedef struct { // Parameters for the LSTM kernel. TfLiteFusedActivation activation; diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index eac7db9a88..b092e5ee54 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -371,7 +371,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: case BuiltinOperator_LSTM: { auto params = allocator->AllocatePOD(); if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) { @@ -391,6 +390,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: { + auto* params = + allocator->AllocatePOD(); + if (auto* seq_lstm_params = + op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) { + params->activation = + parse_activation(seq_lstm_params->fused_activation_function()); + params->cell_clip = seq_lstm_params->cell_clip(); + params->proj_clip = seq_lstm_params->proj_clip(); + } + *builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: { auto params = allocator->AllocatePOD(); diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index ec9cf38b83..89d57e4599 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -431,7 +431,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); + const auto* params = + reinterpret_cast( + node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* input_to_input_weights = @@ -482,6 +484,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + // Copy out the LSTM specific params so they can be passed in the function. + TfLiteLSTMParams lstm_params; + lstm_params.activation = params->activation; + lstm_params.cell_clip = params->cell_clip; + lstm_params.proj_clip = params->proj_clip; + switch (input_to_output_weights->type) { case kTfLiteFloat32: { return lstm_eval::EvalFloat( @@ -496,7 +504,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { /*aux_input_to_cell_weights=*/nullptr, /*aux_input_to_output_weights=*/nullptr, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, projection_weights, - projection_bias, params, /*forward_sequence=*/true, + projection_bias, &lstm_params, /*forward_sequence=*/true, /*output_offset=*/0, scratch_buffer, activation_state, cell_state, output); } @@ -523,7 +531,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { /*aux_input_to_cell_weights=*/nullptr, /*aux_input_to_output_weights=*/nullptr, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, projection_weights, - projection_bias, params, /*forward_sequence=*/true, + projection_bias, &lstm_params, /*forward_sequence=*/true, /*output_offset=*/0, scratch_buffer, scaling_factors, prod_scaling_factors, recovered_cell_weights, input_quantized, /*aux_input_quantized=*/nullptr, activation_state_quantized, diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc index cd3aac0532..c97b0fdd61 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc @@ -110,11 +110,12 @@ class UnidirectionalLSTMOpModel : public SingleOpModel { output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, - BuiltinOptions_LSTMOptions, - CreateLSTMOptions(builder_, ActivationFunctionType_TANH, - cell_clip, proj_clip) - .Union()); + SetBuiltinOp( + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + CreateUnidirectionalSequenceLSTMOptions( + builder_, ActivationFunctionType_TANH, cell_clip, proj_clip) + .Union()); BuildInterpreter(input_shapes); } diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index ff8430827c..cb7a282743 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -250,6 +250,7 @@ union BuiltinOptions { FillOptions, BidirectionalSequenceLSTMOptions, BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, } enum Padding : byte { SAME, VALID } @@ -394,6 +395,13 @@ table LSTMOptions { kernel_type: LSTMKernelType = FULL; } +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + table BidirectionalSequenceLSTMOptions { fused_activation_function:ActivationFunctionType; cell_clip: float; // Optional, 0.0 means no clipping diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index f3cb113c9c..e7b7a59def 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT; struct LSTMOptions; struct LSTMOptionsT; +struct UnidirectionalSequenceLSTMOptions; +struct UnidirectionalSequenceLSTMOptionsT; + struct BidirectionalSequenceLSTMOptions; struct BidirectionalSequenceLSTMOptionsT; @@ -681,11 +684,12 @@ enum BuiltinOptions { BuiltinOptions_FillOptions = 68, BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions + BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -757,7 +761,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] { BuiltinOptions_ZerosLikeOptions, BuiltinOptions_FillOptions, BuiltinOptions_BidirectionalSequenceLSTMOptions, - BuiltinOptions_BidirectionalSequenceRNNOptions + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions }; return values; } @@ -835,6 +840,7 @@ inline const char * const *EnumNamesBuiltinOptions() { "FillOptions", "BidirectionalSequenceLSTMOptions", "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", nullptr }; return names; @@ -1129,6 +1135,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1720,6 +1730,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_BidirectionalSequenceRNNOptions ? reinterpret_cast(value) : nullptr; } + UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3469,6 +3487,84 @@ inline flatbuffers::Offset CreateLSTMOptions( flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { + typedef UnidirectionalSequenceLSTMOptions TableType; + ActivationFunctionType fused_activation_function; + float cell_clip; + float proj_clip; + UnidirectionalSequenceLSTMOptionsT() + : fused_activation_function(ActivationFunctionType_NONE), + cell_clip(0.0f), + proj_clip(0.0f) { + } +}; + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnidirectionalSequenceLSTMOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField(verifier, VT_CELL_CLIP) && + VerifyField(verifier, VT_PROJ_CLIP) && + verifier.EndTable(); + } + UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + UnidirectionalSequenceLSTMOptionsBuilder &operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f) { + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { typedef BidirectionalSequenceLSTMOptions TableType; ActivationFunctionType fused_activation_function; @@ -6488,6 +6584,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast(builtin_options()) : nullptr; } + const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6799,6 +6898,10 @@ template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_optio return builtin_options_as_BidirectionalSequenceRNNOptions(); } +template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -7809,6 +7912,38 @@ inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBuffe _kernel_type); } +inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new UnidirectionalSequenceLSTMOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; + { auto _e = cell_clip(); _o->cell_clip = _e; }; + { auto _e = proj_clip(); _o->proj_clip = _e; }; +} + +inline flatbuffers::Offset UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + return tflite::CreateUnidirectionalSequenceLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip); +} + inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new BidirectionalSequenceLSTMOptionsT(); UnPackTo(_o, _resolver); @@ -9620,6 +9755,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -9918,6 +10057,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -10204,6 +10347,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10490,6 +10637,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -10847,6 +10998,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 367f7d651f19c5b111ea0292243eab81fb4058c7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 7 Oct 2018 02:01:04 -0700 Subject: [PATCH 048/411] compat: Update forward compatibility horizon to 2018-10-07 PiperOrigin-RevId: 216079665 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index d85fb00414..ee56480b00 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7) @tf_export("compat.forward_compatible") -- GitLab From 393a13c1b1a7d51b0871a6d4b3d3413d8e1765bf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 02:03:43 -0700 Subject: [PATCH 049/411] compat: Update forward compatibility horizon to 2018-10-08 PiperOrigin-RevId: 216151605 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index ee56480b00..349c84e13c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8) @tf_export("compat.forward_compatible") -- GitLab From 3bdf3c592472c2b54c513417de8d9b538d3f6078 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 08:08:31 -0700 Subject: [PATCH 050/411] Make ExecutorState preserve the thread context. PiperOrigin-RevId: 216187878 --- tensorflow/core/common_runtime/executor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 2c48084cab..40ec1502da 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/context.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" @@ -1240,6 +1241,7 @@ class ExecutorState { StepStatsCollectorInterface* const stats_collector_; const tracing::TraceCollector* const trace_collector_; const tracing::EventCollector* const event_collector_; + Context context_; // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper // instead of a pointer? (avoids having to delete). @@ -1367,6 +1369,7 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl) trace_collector_(tracing::GetTraceCollector()), event_collector_( tracing::GetEventCollector(tracing::EventCategory::kCompute)), + context_(ContextKind::kThread), slice_reader_cache_(new checkpoint::TensorSliceReaderCacheWrapper), call_frame_(args.call_frame), impl_(impl), @@ -1586,6 +1589,7 @@ bool MightTrace(const NodeItem& item, } void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { + WithContext wc(context_); const GraphView& gview = impl_->gview_; TaggedNodeSeq ready; TaggedNodeReadyQueue inline_ready; -- GitLab From 53961cc2f16dea9d9b2286950c1e4d4c0a3743c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 08:22:48 -0700 Subject: [PATCH 051/411] Improve const correctness of HloDomainMap PiperOrigin-RevId: 216189458 --- tensorflow/compiler/xla/service/hlo_domain_map.cc | 12 +++++++----- tensorflow/compiler/xla/service/hlo_domain_map.h | 14 +++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc index 6ca1255ede..c6d02f9f67 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc @@ -42,18 +42,19 @@ namespace xla { return std::move(domain_map); } -bool HloDomainMap::InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const { +bool HloDomainMap::InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const { int64 domain_id1 = GetDomainId(instruction1); int64 domain_id2 = GetDomainId(instruction2); return domain_id1 >= 0 && domain_id1 == domain_id2; } -int64 HloDomainMap::GetDomainId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainId(const HloInstruction* instruction) const { return FindOrDefault(instruction_to_domain_, instruction, -1); } -int64 HloDomainMap::GetDomainMetadataId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainMetadataId( + const HloInstruction* instruction) const { return FindOrDie(domain_metadata_id_, instruction); } @@ -200,7 +201,8 @@ StatusOr> HloDomainMap::CreateDomain( return std::move(domain); } -bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const { +bool HloDomainMap::IsDomainInstruction( + const HloInstruction* instruction) const { if (instruction->opcode() != HloOpcode::kDomain) { return false; } diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h index c8d581b746..bce7d1aa7c 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.h +++ b/tensorflow/compiler/xla/service/hlo_domain_map.h @@ -58,21 +58,21 @@ class HloDomainMap { } // Checks whether two instructions are within the same domain. - bool InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const; + bool InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const; // Checks whether instruction is a kDomain instruction of the kind we are // currently processing. - bool IsDomainInstruction(HloInstruction* instruction) const; + bool IsDomainInstruction(const HloInstruction* instruction) const; // Retrieves the domain identifier of the instruction, or -1 in case // instruction is not found within any domain. - int64 GetDomainId(HloInstruction* instruction) const; + int64 GetDomainId(const HloInstruction* instruction) const; // Returns the unique id of the domain metadata for the domain the given // instruction belongs to. The given instruction must not be a kDomain // instruction since each domain instruction is associated with 2 domains. - int64 GetDomainMetadataId(HloInstruction* instruction) const; + int64 GetDomainMetadataId(const HloInstruction* instruction) const; private: // Map used for representing instruction ordering, i.e. @@ -119,8 +119,8 @@ class HloDomainMap { string domain_kind_; std::vector> instruction_domains_; - absl::flat_hash_map instruction_to_domain_; - absl::flat_hash_map domain_metadata_id_; + absl::flat_hash_map instruction_to_domain_; + absl::flat_hash_map domain_metadata_id_; }; } // namespace xla -- GitLab From 75f57a8b7836a1ed3cda8ba81c88f6caf15cf0c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 08:35:14 -0700 Subject: [PATCH 052/411] Remove Dims from types.h, create build structure. PiperOrigin-RevId: 216191084 --- .../contrib/lite/kernels/internal/BUILD | 16 ++++++++++++ .../lite/kernels/internal/legacy_types.h | 26 +++++++++++++++++++ .../internal/reference/legacy_reference_ops.h | 7 ++++- .../internal/reference/reference_ops.h | 5 ---- 4 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/internal/legacy_types.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index afb5ec05df..5c9ca6e910 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -49,6 +49,20 @@ cc_library( ], ) +cc_library( + name = "legacy_types", + srcs = [], + hdrs = [ + "compatibility.h", + "legacy_types.h", + "types.h", + ], + deps = [ + "//tensorflow/contrib/lite/kernels:op_macros", + "@com_google_absl//absl/base:core_headers", + ], +) + config_setting( name = "arm", values = { @@ -198,6 +212,7 @@ cc_library( ":strided_slice_logic", ":tensor_utils", ":types", + ":legacy_types", ":legacy_reference_base", ":round", "//third_party/eigen3", @@ -336,6 +351,7 @@ cc_library( ":quantization_util", ":round", ":strided_slice_logic", + ":legacy_types", ":types", "@gemmlowp", "//tensorflow/contrib/lite/c:c_api_internal", diff --git a/tensorflow/contrib/lite/kernels/internal/legacy_types.h b/tensorflow/contrib/lite/kernels/internal/legacy_types.h new file mode 100644 index 0000000000..2e4d3137f5 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/legacy_types.h @@ -0,0 +1,26 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ + +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { + +// TODO(b/116772710): Insert legacy Dims<> code in here. + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h index be99240b1f..c8b64cfd96 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h @@ -19,10 +19,10 @@ limitations under the License. #include #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/legacy_types.h" #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/types.h" namespace tflite { @@ -30,6 +30,11 @@ namespace reference_ops { static constexpr int kDepthwiseReverseShift = -1; +inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) { + shape->BuildFrom( + {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); +} + inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 59f17ae854..19d23fa80b 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -100,11 +100,6 @@ gemmlowp::FixedPoint SaturatingSub( namespace reference_ops { -inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) { - shape->BuildFrom( - {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); -} - template int CountLeadingZeros(T integer_input) { static_assert(std::is_unsigned::value, -- GitLab From 5f308cb408eb46ec9af0546be6b9ae1d5166b185 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 09:06:04 -0700 Subject: [PATCH 053/411] Optimize PinToHostOptimizer by adding cache, also add PinToHostOptimizer to benchmarks. original runtime: 4.83492736816 secs w/ cache runtime: 2.19033999443 secs PiperOrigin-RevId: 216195286 --- tensorflow/core/grappler/op_types.cc | 22 ++- .../optimizers/pin_to_host_optimizer.cc | 162 ++++++++++++------ .../optimizers/pin_to_host_optimizer.h | 4 +- .../optimizers/pin_to_host_optimizer_test.cc | 76 +++++--- 4 files changed, 179 insertions(+), 85 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 1b5a215987..cbf5c8e038 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -102,15 +102,19 @@ bool IsConjugateTranspose(const NodeDef& node) { } bool IsControlFlow(const NodeDef& node) { - // clang-format off - return node.op() == "ControlTrigger" || - node.op() == "Enter" || - node.op() == "Exit" || - node.op() == "LoopCond" || - node.op() == "Merge" || - node.op() == "NextIteration" || - node.op() == "Switch"; - // clang-format on + // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative + // string comparison. + static const gtl::FlatSet* const kControFlowOps = + CHECK_NOTNULL((new gtl::FlatSet{ + "ControlTrigger", + "Enter", + "Exit", + "LoopCond", + "Merge", + "NextIteration", + "Switch", + })); + return kControFlowOps->count(node.op()) > 0; } bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; } diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc index 8ed4271fa4..29a3b2b74c 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc @@ -25,16 +25,29 @@ limitations under the License. #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { namespace grappler { + namespace internal { +namespace { // TODO(williamchan): Change this constant to be something smarter, maybe // dynamically determined. constexpr int64 kTensorMaxSize = 64; +struct OpDevicePortHasher { + std::size_t operator()(const std::tuple& x) const { + uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x))); + + return Hash64Combine(code, hash()(std::get<2>(x))); + } +}; +using OpDevicePortOnHostMap = + gtl::FlatMap, bool, OpDevicePortHasher>; + // All the nodes that should be blacklisted and not swapped. bool IsBlacklisted(const NodeDef& node) { return @@ -82,10 +95,10 @@ Status TryFindKernelDef(const std::vector& devices, // Checks if a node's output port is host friendly. // Roughly this means checking if the output port is on Host memory. -Status IsNodeOutputPortHostFriendly(const GraphView& graph, - GraphProperties* properties, - const NodeDef& node, int port_id, - bool* is_candidate) { +Status IsNodeOutputPortHostFriendly( + const GraphView& graph, GraphProperties* properties, const NodeDef& node, + int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache, + bool* is_candidate) { *is_candidate = false; // Make sure we are not a blacklisted op. @@ -117,7 +130,8 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, for (const auto& fanin : graph.GetFanins(node, false)) { bool fanin_candidate = false; TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( - graph, properties, *fanin.node, fanin.port_id, &fanin_candidate)); + graph, properties, *fanin.node, fanin.port_id, + op_device_outport_pinned_to_host_cache, &fanin_candidate)); if (!fanin_candidate) { return Status::OK(); } @@ -132,11 +146,22 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, return Status::OK(); } + // Check `op_device_outport_pinned_to_host_cache` for our + // {op, device, port_id} combo to see if the arg is pinned on Host. + const std::tuple cache_key(node.op(), node.device(), + port_id); + auto it = op_device_outport_pinned_to_host_cache->find(cache_key); + if (it != op_device_outport_pinned_to_host_cache->end()) { + *is_candidate = it->second; + return Status::OK(); + } + // Check if op's output port is pinned to HostMemory. const OpDef* op = nullptr; Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op); if (!s.ok()) { LOG(WARNING) << "Could not find OpDef for : " << node.op(); + op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -146,6 +171,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, LOG(WARNING) << "Invalid port: " << port_id << "!\n" << node.DebugString() << "\n" << op->DebugString(); + op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -155,6 +181,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, &kernel); if (!s.ok()) { LOG(INFO) << "Could not find KernelDef for: " << node.op(); + op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -166,22 +193,35 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, } } + op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate); + return Status::OK(); } // Checks if a node's input port is Host friendly. // Roughly this means checking if the input port is on Host memory. -bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) { +bool IsNodeInputPortHostFriendly( + const NodeDef& node, int port_id, + OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) { // If node is on Host, assume its inputs are Host friendly. if (str_util::StrContains(node.device(), DEVICE_CPU)) { return true; } + // Check `op_device_inport_pinned_to_host_cache` for our + // {op, device, port_id} combo to see if the arg is pinned on Host. + std::tuple cache_key(node.op(), node.device(), port_id); + auto it = op_device_inport_pinned_to_host_cache->find(cache_key); + if (it != op_device_inport_pinned_to_host_cache->end()) { + return it->second; + } + // Check if op's input port is pinned to HostMemory. const OpDef* op = nullptr; Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op); if (!s.ok()) { LOG(WARNING) << "Could not find OpDef for : " << node.op(); + op_device_inport_pinned_to_host_cache->emplace(cache_key, false); return false; } const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id); @@ -192,16 +232,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) { {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel); if (!s.ok()) { LOG(INFO) << "Could not find KernelDef for: " << node.op(); + op_device_inport_pinned_to_host_cache->emplace(cache_key, false); return false; } // Check if the input_arg is pinned to Host. for (const string& host_memory_arg : kernel->host_memory_arg()) { if (op->input_arg(input_arg_id).name() == host_memory_arg) { + op_device_inport_pinned_to_host_cache->emplace(cache_key, true); return true; } } + op_device_inport_pinned_to_host_cache->emplace(cache_key, false); + return false; } @@ -211,18 +255,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) { // 2] Check if node can run on Host. // 3] Check all input/outputs are Host "friendly" (atm, friendly means small, // ints, and pinned to Host). -Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties, - const NodeDef& node, bool* is_candidate) { +Status IsNodeHostCandidate( + const GraphView& graph, GraphProperties* properties, const NodeDef& node, + OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache, + bool* is_candidate) { *is_candidate = false; - // Check if node already on CPU. - if (str_util::StrContains(node.device(), DEVICE_CPU)) { - *is_candidate = true; + // Skip these node types. + if (IsBlacklisted(node)) { return Status::OK(); } - // Skip these node types. - if (IsBlacklisted(node)) { + // Check if node already on CPU. + if (str_util::StrContains(node.device(), DEVICE_CPU)) { + *is_candidate = true; return Status::OK(); } @@ -232,17 +278,6 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties, return Status::OK(); } - // Check all inputs are Host friendly. - for (const GraphView::OutputPort& fanin : - graph.GetFanins(node, /*include_controlling_nodes=*/false)) { - bool fanin_candidate = false; - TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( - graph, properties, *fanin.node, fanin.port_id, &fanin_candidate)); - if (!fanin_candidate) { - return Status::OK(); - } - } - // Check all outputs are Host friendly. if (!properties->has_properties()) { // This is an expensive call, call it lazily. @@ -255,16 +290,42 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties, } } + // Check all inputs are Host friendly. + for (const GraphView::OutputPort& fanin : + graph.GetFanins(node, /*include_controlling_nodes=*/false)) { + bool fanin_candidate = false; + TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( + graph, properties, *fanin.node, fanin.port_id, + op_device_outport_pinned_to_host_cache, &fanin_candidate)); + if (!fanin_candidate) { + return Status::OK(); + } + } + *is_candidate = true; return Status::OK(); } -string TryFindHostDevice(const gtl::FlatSet& devices, - bool has_device_cpu, const string& device) { +bool IsTPUGraphDef(const GraphDef& def) { + for (const auto& node : def.node()) { + if (node.op() == "TPUCompile" || node.op() == "TPUExecute" || + node.op() == "TPUPartitionedCall") { + return true; + } + } + return false; +} +} // end namespace + +// Tries to swap `device` to a Host device from `devices`. Returns true iff +// there was a swap. +bool TrySwapToHostDevice(const gtl::FlatSet& devices, + bool has_device_cpu, string* device) { // Force this node onto the CPU. - if (device.empty() && has_device_cpu) { - return "/device:CPU:0"; - } else if (str_util::StrContains(device, DEVICE_GPU)) { + if (device->empty() && has_device_cpu) { + *device = "/device:CPU:0"; + return true; + } else if (str_util::StrContains(*device, DEVICE_GPU)) { // Sometimes the cluster can have: // devices = {"/device:CPU:0", "/device:XLA_GPU:0"} // and we need to handle them properly. @@ -272,27 +333,19 @@ string TryFindHostDevice(const gtl::FlatSet& devices, {std::pair("GPU", "CPU:0"), std::pair("/device", "/device:CPU:0")}) { const string device_host = - strings::StrCat(device.substr(0, device.rfind(device_match.first)), + strings::StrCat(device->substr(0, device->rfind(device_match.first)), device_match.second); if (devices.find(device_host) != devices.end()) { - return device_host; + *device = device_host; + return true; } } } - // We couldn't find an appropriate Host device, return original device. - return device; -} - -bool IsTPUGraphDef(const GraphDef& def) { - for (const auto& node : def.node()) { - if (node.op() == "TPUCompile" || node.op() == "TPUExecute" || - node.op() == "TPUPartitionedCall") { - return true; - } - } + // We couldn't find an appropriate Host device, return false. return false; } + } // end namespace internal Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, @@ -324,20 +377,26 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // All the Const nodes, and their original devices in topological order. std::vector> const_nodes; + // Cache to map {op, device, port} -> bool on whether it is pinned to host. + internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache; + internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache; + for (auto& node : *optimized_graph->mutable_node()) { bool is_candidate = false; - TF_RETURN_IF_ERROR( - internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate)); + TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate( + graph, &properties, node, &op_device_outport_pinned_to_host_cache, + &is_candidate)); if (!is_candidate) { continue; } - if (IsConstant(node)) { - const_nodes.emplace_back(&node, node.device()); + const string original_device = node.device(); + const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu, + node.mutable_device()); + // Keep track of all Const nodes that we swapped. + if (swapped && IsConstant(node)) { + const_nodes.emplace_back(&node, original_device); } - // Try and swap the device to Host. - node.set_device( - internal::TryFindHostDevice(devices, has_device_cpu, node.device())); } // Traverse all `const_nodes`, and map them back to GPU greedily. @@ -349,8 +408,9 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // this node back onto the original device. for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) { // The consumer is not Host friendly, swap it back to the original device. - if (!internal::IsNodeInputPortHostFriendly(*fanout.node, - fanout.port_id)) { + if (!internal::IsNodeInputPortHostFriendly( + *fanout.node, fanout.port_id, + &op_device_inport_pinned_to_host_cache)) { node->set_device(device); break; } diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h index d557a03463..bed4a9ef95 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h @@ -26,8 +26,8 @@ namespace tensorflow { namespace grappler { namespace internal { // Try and find an appropriate Host device in `devices` given `device`. -string TryFindHostDevice(const gtl::FlatSet& devices, - bool has_device_cpu, const string& device); +bool TrySwapToHostDevice(const gtl::FlatSet& devices, + bool has_device_cpu, string* device); } // end namespace internal // Optimize TensorFlow ops that should be swapped into the CPU to avoid diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc index 7c64529441..9bb030b220 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc @@ -28,30 +28,60 @@ namespace { class PinToHostOptimizerTest : public GrapplerTest {}; -TEST_F(PinToHostOptimizerTest, TryFindHostDevice) { +TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) { gtl::FlatSet devices = {}; - EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC")); - - devices = {"/device:CPU:0", "/device:XLA_GPU:0"}; - EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0"); - EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"), - "/device:CPU:0"); - EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"), - "/device:CPU:0"); - - devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; - EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), - "/device:XLA_CPU:0"); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), - "/device:XLA_CPU:0"); - - devices = {"/device:XLA_GPU:0"}; - EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), - "/device:XLA_GPU:0"); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), - "/device:XLA_GPU:*"); + + string device = "ABC"; + EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_EQ(device, "ABC"); +} + +TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) { + gtl::FlatSet devices = {"/device:CPU:0", "/device:XLA_GPU:0"}; + + string device = ""; + EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); + EXPECT_EQ(device, "/device:CPU:0"); + + device = "/device:XLA_GPU:0"; + EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); + EXPECT_EQ(device, "/device:CPU:0"); + + device = "/device:XLA_GPU:*"; + EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); + EXPECT_EQ(device, "/device:CPU:0"); +} + +TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) { + gtl::FlatSet devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; + + string device = ""; + EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_TRUE(device.empty()); + + device = "/device:XLA_GPU:0"; + EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_EQ(device, "/device:XLA_CPU:0"); + + device = "/device:XLA_GPU:*"; + EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_EQ(device, "/device:XLA_CPU:0"); +} + +TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) { + gtl::FlatSet devices = {"/device:XLA_GPU:0"}; + + string device = ""; + EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_TRUE(device.empty()); + + device = "/device:XLA_GPU:0"; + EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_EQ(device, "/device:XLA_GPU:0"); + + device = "/device:XLA_GPU:*"; + EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); + EXPECT_EQ(device, "/device:XLA_GPU:*"); } TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) { -- GitLab From 411b9baa39636030181fdff15d2e985824b03d61 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Mon, 8 Oct 2018 09:42:50 -0700 Subject: [PATCH 054/411] Reduce tolerances for rmsprop_test float16, to fix OSS builds. PiperOrigin-RevId: 216200439 --- tensorflow/contrib/optimizer_v2/rmsprop_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py index 44301ffe9e..83f5971039 100644 --- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py +++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py @@ -157,8 +157,11 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) - self.assertAllCloseAccordingToType(var0_np, var0.eval()) - self.assertAllCloseAccordingToType(var1_np, var1.eval()) + # TODO(b/117393988): Reduce tolerances for float16. + self.assertAllCloseAccordingToType( + var0_np, var0.eval(), half_rtol=3e-3, half_atol=3e-3) + self.assertAllCloseAccordingToType( + var1_np, var1.eval(), half_rtol=3e-3, half_atol=3e-3) @parameterized.parameters([dtypes.float32, dtypes.float64]) def testMinimizeSparseResourceVariable(self, dtype): -- GitLab From f435e776216c7a86f619a17064fd6e1deee638b3 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 8 Oct 2018 09:49:38 -0700 Subject: [PATCH 055/411] Avoid adding spurious ops when colocating with resource variables. Prior to this change, tf.colocate_with(v) would insert spurious operations (a ReadVariableOp and an Identity) in the graph when v is a resource variable, and then colocate the operations within the block with those newly added, otherwise disconnected, operations. This commit avoids adding the unnecessary ReadVariableOp/Identity nodes and colocates operations within the block with the VarHandleOp. PiperOrigin-RevId: 216201638 --- .../python/parameter_server_strategy_test.py | 4 ++- tensorflow/python/framework/ops.py | 28 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py index 353d11a583..9c112e4f85 100644 --- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py +++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py @@ -262,7 +262,9 @@ class ParameterServerStrategyTestBase( h = f + 1.0 self.assertEqual( device_util.canonicalize(u.device), tower_variable_device) - self.assertEqual(device_util.canonicalize(x.device), h.device) + self.assertEqual( + device_util.canonicalize(x.device), + device_util.canonicalize(h.device)) return y_add, z_add, f y, z, f = d.call_for_each_tower(model_fn) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8bb177939e..77c2bc930e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4140,10 +4140,7 @@ class Graph(object): if op is None and not ignore_existing: raise ValueError("Trying to reset colocation (op is None) but " "ignore_existing is not True") - - if op is not None and not isinstance(op, Operation): - # We always want to colocate with the reference op. - op = internal_convert_to_tensor_or_indexed_slices(op, as_ref=True).op + op = _op_to_colocate_with(op) # By default, colocate_with resets the device function stack, # since colocate_with is typically used in specific internal @@ -6168,4 +6165,27 @@ def _operation_conversion_error(op, dtype=None, name=None, as_ref=False): name, as_ref)) +def _op_to_colocate_with(v): + """Operation object corresponding to v to use for colocation constraints.""" + if v is None: + return None + if isinstance(v, Operation): + return v + # We always want to colocate with the reference op. + # When 'v' is a ResourceVariable, the reference op is the handle creating op. + # + # What this should be is: + # if isinstance(v, ResourceVariable): + # return v.handle.op + # However, that would require a circular import dependency. + # As of October 2018, there were attempts underway to remove + # colocation constraints altogether. Assuming that will + # happen soon, perhaps this hack to work around the circular + # import dependency is acceptable. + if hasattr(v, "handle") and hasattr(v.handle, "op") and isinstance( + v.handle.op, Operation): + return v.handle.op + return internal_convert_to_tensor_or_indexed_slices(v, as_ref=True).op + + register_tensor_conversion_function(Operation, _operation_conversion_error) -- GitLab From 87315f41ced19136819cef56ef37636c52c474de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 09:49:59 -0700 Subject: [PATCH 056/411] Remove Raises documentation on imperative_grads for ValueErrror not raised. PiperOrigin-RevId: 216201714 --- tensorflow/python/eager/imperative_grad.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index 5f5af4ab6c..5c35860e9d 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -51,11 +51,6 @@ def imperative_grad( Raises: RuntimeError: if something goes wrong. - ValueError: if there is no sequence of differentiable operations connecting - a source and any target Tensor. This can happen either if the target is - not computed based on the source, if the tracing was set up incorrectly, - or if only non-differentiable functions of the source were used in the - computation of target. """ return pywrap_tensorflow.TFE_Py_TapeGradient( tape._tape, # pylint: disable=protected-access -- GitLab From 07df147ab20c4a5329148e5fb5f7f6b187cb73a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 09:50:08 -0700 Subject: [PATCH 057/411] Enable PinToHostOptimizer. PiperOrigin-RevId: 216201732 --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index c3d70a1fdf..3f33b16ba8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -107,7 +107,8 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("scoped_allocator", new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(), cfg_.scoped_allocator_opts())); - MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization())); + MK_OPT("pin_to_host", + new PinToHostOptimizer(cfg_.pin_to_host_optimization())); return std::unique_ptr(); } @@ -139,7 +140,7 @@ Status MetaOptimizer::InitializeOptimizers( if (cfg_.remapping() != RewriterConfig::OFF) { optimizers->push_back(MakeUnique(cfg_.remapping())); } - if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) { + if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) { optimizers->push_back(MakeUnique()); } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { @@ -527,7 +528,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || cfg.debug_stripper() == RewriterConfig::ON || cfg.scoped_allocator_optimization() == RewriterConfig::ON || - cfg.pin_to_host_optimization() == RewriterConfig::ON || + cfg.pin_to_host_optimization() != RewriterConfig::OFF || !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } -- GitLab From da3abf6afeaf781b932bce9ccb6c17da911e49b6 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 8 Oct 2018 09:53:31 -0700 Subject: [PATCH 058/411] Benchmark for comparing original cond and cond_v2 performance. This benchmark creates many intermediates values, so we can make sure there's no performance overhead (it looks like there might be currently, or it might be from some other difference). It also runs in a defun and in legacy graph mode. Results from my machine: entry { name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_defun" iters: 500 wall_time: 1.25822591782 } entry { name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_defun" iters: 500 wall_time: 5.99376106262 } entry { name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_graph" iters: 500 wall_time: 2.05277585983 } entry { name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_graph" iters: 500 wall_time: 2.84808516502 } Clearly we have some work to do! I haven't looked into the time differences at all yet. PiperOrigin-RevId: 216202325 --- tensorflow/python/BUILD | 13 ++ .../python/ops/control_flow_ops_benchmark.py | 122 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 tensorflow/python/ops/control_flow_ops_benchmark.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index da3c56db92..822d596995 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5196,6 +5196,19 @@ cuda_py_test( main = "ops/concat_benchmark.py", ) +cuda_py_test( + name = "control_flow_ops_benchmark", + srcs = ["ops/control_flow_ops_benchmark.py"], + additional_deps = [ + ":client_testlib", + ":constant_op", + ":control_flow_ops", + ":framework_ops", + "//tensorflow/python/eager:function", + ], + main = "ops/control_flow_ops_benchmark.py", +) + cuda_py_test( name = "conv2d_benchmark", size = "large", diff --git a/tensorflow/python/ops/control_flow_ops_benchmark.py b/tensorflow/python/ops/control_flow_ops_benchmark.py new file mode 100644 index 0000000000..9ba5ff2c0f --- /dev/null +++ b/tensorflow/python/ops/control_flow_ops_benchmark.py @@ -0,0 +1,122 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmark for control flow ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time + +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.eager import function +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import test + + +class CondWithManyIntermediatesBenchmark(test.Benchmark): + """Checks the runtime performance of outputting all intermediates.""" + + NUM_INTERMEDIATES = 1000 + NUM_ITERS = 500 + NUM_WARM_UP_ITERS = 50 + + def _create_cond(self, x): + + def branch_fn(): + # Use a random value so the adds can't be constant folded. + return x + sum(random_ops.random_normal([]) + for _ in range(self.NUM_INTERMEDIATES)) + + # Use a dynamic predicate to make sure the cond isn't constant folded. + return control_flow_ops.cond(math_ops.not_equal(x, -1), + branch_fn, lambda: 0.0) + + def _benchmark_defun(self): + """Benchmarks cond in a defun.""" + + @function.defun + def cond_fn(x): + return self._create_cond(x) + + # Warm up + for _ in range(self.NUM_WARM_UP_ITERS): + cond_fn(0.0) + + start_time = time.time() + + for _ in range(self.NUM_ITERS): + cond_fn(0.0) + + self.report_benchmark( + wall_time=time.time() - start_time, + iters=self.NUM_ITERS) + + def _benchmark_graph(self): + """Benchmarks cond in legacy graph mode.""" + with context.graph_mode(): + with ops.Graph().as_default(): + x = array_ops.placeholder(dtypes.float32) + cond_val = self._create_cond(x) + + with session.Session() as sess: + cond_fn = sess.make_callable(cond_val, [x]) + + # Warm up + for _ in range(self.NUM_WARM_UP_ITERS): + cond_fn(0.0) + + start_time = time.time() + + for _ in range(self.NUM_ITERS): + cond_fn(0.0) + + self.report_benchmark( + wall_time=time.time() - start_time, + iters=self.NUM_ITERS) + + def benchmark_cond_v1_defun(self): + old_val = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = False + self._benchmark_defun() + control_flow_ops.ENABLE_COND_V2 = old_val + + def benchmark_cond_v2_defun(self): + old_val = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = True + self._benchmark_defun() + control_flow_ops.ENABLE_COND_V2 = old_val + + def benchmark_cond_v1_graph(self): + old_val = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = False + self._benchmark_graph() + control_flow_ops.ENABLE_COND_V2 = old_val + + def benchmark_cond_v2_graph(self): + old_val = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = True + self._benchmark_graph() + control_flow_ops.ENABLE_COND_V2 = old_val + +if __name__ == "__main__": + ops.enable_eager_execution() + test.main() -- GitLab From 6dd826b856acf6b060379251bfd91a950ee2b0af Mon Sep 17 00:00:00 2001 From: Makoto Uchida Date: Mon, 8 Oct 2018 10:00:18 -0700 Subject: [PATCH 059/411] Fix typo PiperOrigin-RevId: 216203408 --- .../experimental/kernel_tests/reader_dataset_ops_test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py index fe0b3b5f3b..77df8310d4 100644 --- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py +++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py @@ -64,7 +64,7 @@ class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase): class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase): - """Base class for setting up and testing `make_batched_feature_dataset`.""" + """Base class for setting up and testing `make_batched_features_dataset`.""" def setUp(self): super(MakeBatchedFeaturesDatasetTestBase, self).setUp() -- GitLab From 0e1ba8886b6a333b1ed8ed7548c55041c34e9623 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 10:09:50 -0700 Subject: [PATCH 060/411] Fix compilation in unique_op when Eigen::Index != int64. PiperOrigin-RevId: 216205396 --- tensorflow/core/kernels/unique_op.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 3559baa18e..3bdcfc90b8 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -108,7 +108,7 @@ class UniqueOp : public OpKernel { std::unordered_map uniq; uniq.reserve(2 * N); - for (int64 i = 0, j = 0; i < N; ++i) { + for (Eigen::Index i = 0, j = 0; i < N; ++i) { auto it = uniq.insert(std::make_pair(Tin(i), j)); idx_vec(i) = it.first->second; if (it.second) { @@ -131,19 +131,20 @@ class UniqueOp : public OpKernel { // General implementation when unique is run over multiple elements. auto Tin = input.shaped(new_sizes); - auto hash_fn = [&Tin](const int64& key) { + auto hash_fn = [&Tin](const Eigen::Index& key) { size_t h = 0; - for (int64 i = 0; i < Tin.dimension(0); i++) { - for (int64 j = 0; j < Tin.dimension(2); j++) { + for (Eigen::Index i = 0; i < Tin.dimension(0); i++) { + for (Eigen::Index j = 0; j < Tin.dimension(2); j++) { h = Hash64Combine(h, hash{}(Tin(i, key, j))); } } return h; }; - auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { - for (int64 i = 0; i < Tin.dimension(0); i++) { - for (int64 j = 0; j < Tin.dimension(2); j++) { + auto equal_to_fn = [&Tin](const Eigen::Index& lhs, + const Eigen::Index& rhs) { + for (Eigen::Index i = 0; i < Tin.dimension(0); i++) { + for (Eigen::Index j = 0; j < Tin.dimension(2); j++) { if (Tin(i, lhs, j) != Tin(i, rhs, j)) { return false; } -- GitLab From 0e42fd6d0a88b30ab57959f38c79bea19d745ec3 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 8 Oct 2018 10:14:58 -0700 Subject: [PATCH 061/411] [tf.data] Adding specialization for `MapDataset`, `ParallelMapDataset`, and `MapAndBatchDataset` whose user-provided functions have the property that each output argument take its value directly from an input argument (e.g. `lambda x, y: y, x`). This specialization can produce the result without having to schedule the function using the executor. PiperOrigin-RevId: 216206232 --- tensorflow/core/kernels/data/BUILD | 14 ++ tensorflow/core/kernels/data/dataset_utils.cc | 47 +++++ tensorflow/core/kernels/data/dataset_utils.h | 20 ++ .../core/kernels/data/dataset_utils_test.cc | 46 +++++ .../core/kernels/data/filter_dataset_op.cc | 162 ++++++--------- .../kernels/data/map_and_batch_dataset_op.cc | 187 +++++++++++------- .../core/kernels/data/map_dataset_op.cc | 62 ++++-- .../kernels/data/parallel_map_dataset_op.cc | 79 +++++--- .../kernels/data/parallel_map_iterator.cc | 17 +- .../core/kernels/data/parallel_map_iterator.h | 2 +- .../kernels/data/parse_example_dataset_op.cc | 2 +- .../kernel_tests/map_and_batch_test.py | 31 +++ .../kernel_tests/filter_dataset_op_test.py | 2 +- .../data/kernel_tests/map_dataset_op_test.py | 95 +++++++-- .../python/data/kernel_tests/test_base.py | 29 +++ 15 files changed, 565 insertions(+), 230 deletions(-) create mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 451f8c1a6c..37c1c54786 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -45,6 +45,16 @@ cc_library( ], ) +tf_cc_test( + name = "dataset_utils_test", + srcs = ["dataset_utils_test.cc"], + deps = [ + ":dataset_utils", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "captured_function", srcs = ["captured_function.cc"], @@ -205,6 +215,7 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", + ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -232,6 +243,7 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", + ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -245,6 +257,7 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", + ":dataset_utils", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -285,6 +298,7 @@ tf_kernel_library( deps = [ ":captured_function", ":dataset", + ":dataset_utils", ":parallel_map_iterator", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index e10833f525..a40f7f2146 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -15,10 +15,57 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/gtl/cleanup.h" namespace tensorflow { namespace data { +Status ComputeShortCircuitIndices(OpKernelContext* ctx, + const NameAttrList& func, + std::vector* indices) { + FunctionLibraryRuntime::Handle fn_handle; + TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate( + func.name(), AttrSlice(&func.attr()), &fn_handle)); + auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() { + Status s = ctx->function_library()->ReleaseHandle(fn_handle); + if (!s.ok()) { + LOG(WARNING) << "Failed to release handle: " << s.error_message(); + } + }); + + const FunctionBody* fn_body = + ctx->function_library()->GetFunctionBody(fn_handle); + indices->resize(fn_body->ret_nodes.size()); + for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) { + Node* ret_node = fn_body->ret_nodes[i]; + Node* ret_input_node; + TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node)); + if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) { + TF_RETURN_IF_ERROR( + GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i]))); + } else { + indices->clear(); + break; + } + } + return Status::OK(); +} + +std::vector ComputeMoveVector(const std::vector& indices) { + std::map last_use; + for (size_t i = 0; i < indices.size(); ++i) { + last_use[indices[i]] = i; + } + std::vector can_move; + can_move.resize(indices.size()); + for (size_t i = 0; i < indices.size(); ++i) { + can_move[i] = last_use[indices[i]] == i; + } + return can_move; +} + Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 6ec1350cd4..d777062293 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -22,6 +22,26 @@ limitations under the License. namespace tensorflow { namespace data { +// This method is used to determine whether we can short-circuit the evaluation +// of the user-defined function `func`. Short-circuting is possible if every +// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) = +// (y,x)`, or `f(x) = (x,x)`). +// +// If short-circuiting is possible, the method stores the mapping from output +// indices to input indices in `indices`. Otherwise, `indices` will be empty. +// +// Returns non-ok status if analysis of the function fails. +// +// TODO(jsimsa): Extend this to support constants as well. +Status ComputeShortCircuitIndices(OpKernelContext* ctx, + const NameAttrList& func, + std::vector* indices); + +// Given a vector that maps output indices to input indices, return a vector +// that identifies for which output indices can we move the input (assuming +// output indices are processed left to right). +std::vector ComputeMoveVector(const std::vector& indices); + Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc new file mode 100644 index 0000000000..43295b8ebb --- /dev/null +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/dataset_utils.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace data { +namespace { + +TEST(DatasetUtils, ComputeMoveVector) { + struct TestCase { + std::vector indices; + std::vector expected; + }; + + TestCase test_cases[] = { + TestCase{{}, {}}, + TestCase{{1}, {true}}, + TestCase{{1, 1}, {false, true}}, + TestCase{{1, 2}, {true, true}}, + TestCase{{1, 1, 2}, {false, true, true}}, + TestCase{{1, 2, 2}, {true, false, true}}, + }; + + for (auto& test_case : test_cases) { + EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices)); + } +} + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index 00884314a9..be7d182a1f 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -18,9 +18,11 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -31,67 +33,84 @@ namespace { class FilterDatasetOp : public UnaryDatasetOpKernel { public: + using FilterIteratorPredicate = + std::function, bool*)>; + explicit FilterDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx), - graph_def_version_(ctx->graph_def_version()) { + : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_)); } void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - FunctionLibraryRuntime::Handle pred_handle; - OP_REQUIRES_OK(ctx, - ctx->function_library()->Instantiate( - func_.name(), AttrSlice(&func_.attr()), &pred_handle)); - auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() { - OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle)); - }); - - const FunctionBody* pred_body = - ctx->function_library()->GetFunctionBody(pred_handle); - OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1, - errors::InvalidArgument( - "predicate function must have a single return value.")); - Node* ret_node = pred_body->ret_nodes[0]; - Node* ret_input_node; - OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node)); - std::unique_ptr captured_func; OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", &captured_func)); - if (ret_input_node->def().op() == "_Arg") { - int32 index = -1; - OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index)); - *output = new FilterTensorDataset(ctx, input, func_, - std::move(captured_func), index); + std::vector indices; + OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); + OP_REQUIRES(ctx, indices.size() <= 1, + errors::InvalidArgument( + "predicate function has more than one return value.")); + + FilterIteratorPredicate filter_pred; + if (indices.empty()) { + CapturedFunction* raw_captured_func = captured_func.get(); + filter_pred = [raw_captured_func](IteratorContext* ctx, + const std::vector& args, + bool* out_matched) { + std::vector result; + TF_RETURN_IF_ERROR( + raw_captured_func->RunWithBorrowedArgs(ctx, args, &result)); + + if (result.size() != 1 || result[0].dtype() != DT_BOOL || + result[0].NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = result[0].scalar()(); + return Status::OK(); + }; } else { - *output = new FilterFunctionDataset(ctx, input, func_, - std::move(captured_func)); + filter_pred = [indices](IteratorContext* ctx, + const std::vector& args, + bool* out_matched) { + const Tensor& predicate = args[indices[0]]; + if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = predicate.scalar()(); + return Status::OK(); + }; } + + *output = new Dataset(ctx, input, func_, std::move(captured_func), + std::move(filter_pred)); } private: - const int graph_def_version_; - - class FilterDatasetBase : public DatasetBase { + class Dataset : public DatasetBase { public: - FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, - std::unique_ptr captured_func) + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func, + FilterIteratorPredicate filter_pred) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), - captured_func_(std::move(captured_func)) { + captured_func_(std::move(captured_func)), + filter_pred_(std::move(filter_pred)) { input_->Ref(); } - ~FilterDatasetBase() override { input_->Unref(); } + ~Dataset() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return std::unique_ptr( - new Iterator({this, strings::StrCat(prefix, "::Filter")})); + return MakeUnique( + Iterator::Params{this, strings::StrCat(prefix, "::Filter")}, + filter_pred_); } const DataTypeVector& output_dtypes() const override { @@ -133,17 +152,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - virtual Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const = 0; - private: - class Iterator : public DatasetIterator { + class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params) - : DatasetIterator(params), + explicit Iterator(const Params& params, + FilterIteratorPredicate filter_pred) + : DatasetIterator(params), filtered_elements_(0), - dropped_elements_(0) { + dropped_elements_(0), + filter_pred_(std::move(filter_pred)) { std::vector components = str_util::Split(params.prefix, "::", str_util::SkipEmpty()); prefix_end_ = components.back(); @@ -180,8 +197,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - TF_RETURN_IF_ERROR( - dataset()->EvaluatePredicate(ctx, *out_tensors, &matched)); + TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched)); if (!matched) { // Clear the output tensor list since it didn't match. out_tensors->clear(); @@ -251,64 +267,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_ GUARDED_BY(mu_); int64 filtered_elements_ GUARDED_BY(mu_); int64 dropped_elements_ GUARDED_BY(mu_); + const FilterIteratorPredicate filter_pred_; string prefix_end_; }; const DatasetBase* const input_; const NameAttrList func_; - - protected: const std::unique_ptr captured_func_; - }; - - class FilterFunctionDataset : public FilterDatasetBase { - public: - using FilterDatasetBase::FilterDatasetBase; - - protected: - Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const override { - // TODO(mrry): Avoid blocking a threadpool thread. We will need to - // stack-rip the iterators and use async kernels. - std::vector result; - TF_RETURN_IF_ERROR( - captured_func_->RunWithBorrowedArgs(ctx, element, &result)); - - if (result.size() != 1 || result[0].dtype() != DT_BOOL || - result[0].NumElements() != 1) { - return errors::InvalidArgument( - "Filter predicate `f` must return a scalar bool."); - } - *out_matched = result[0].scalar()(); - return Status::OK(); - } - }; - - class FilterTensorDataset : public FilterDatasetBase { - public: - FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, - std::unique_ptr captured_func, - int32 index) - : FilterDatasetBase(ctx, input, func, std::move(captured_func)), - index_(index) {} - - protected: - Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const override { - const Tensor& predicate = element[index_]; - if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { - return errors::InvalidArgument( - "Filter predicate `f` must return a scalar bool."); - } - *out_matched = predicate.scalar()(); - return Status::OK(); - } - - private: - const int32 index_; + const FilterIteratorPredicate filter_pred_; }; private: diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index bf08970560..f45a239793 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/inplace_ops_functor.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/gtl/cleanup.h" @@ -29,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -41,6 +43,10 @@ namespace { // transformation more robust. class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { public: + using MapAndBatchIteratorFunction = + std::function, + std::shared_ptr>, StatusCallback)>; + explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx), op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) { @@ -91,31 +97,73 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", &captured_func)); - *output = new Dataset(ctx, input, batch_size, num_parallel_calls, - drop_remainder, output_types_, output_shapes_, func_, - std::move(captured_func), &ctx->eigen_cpu_device()); + std::vector indices; + OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); + + MapAndBatchIteratorFunction map_func; + CapturedFunction* raw_captured_func = captured_func.get(); + if (indices.empty()) { + map_func = [raw_captured_func]( + IteratorContext* ctx, const string& prefix, + std::vector args, + std::shared_ptr> out_tensors, + StatusCallback done) { + raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(), + std::move(done), prefix); + }; + } else { + std::vector can_move = ComputeMoveVector(indices); + map_func = [raw_captured_func, indices, can_move]( + IteratorContext* ctx, const string& prefix, + std::vector args, + std::shared_ptr> out_tensors, + StatusCallback done) { + const std::vector& captured_inputs = + raw_captured_func->captured_inputs(); + size_t num_args = args.size(); + for (size_t i = 0; i < indices.size(); ++i) { + if (indices[i] < num_args) { + if (can_move[i]) { + out_tensors->push_back(std::move(args[indices[i]])); + } else { + out_tensors->push_back(args[indices[i]]); + } + } else { + out_tensors->push_back(captured_inputs[indices[i] - num_args]); + } + } + done(Status::OK()); + }; + } + + *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls, + drop_remainder, output_types_, output_shapes_, + std::move(captured_func), &ctx->eigen_cpu_device(), + std::move(map_func)); } private: class Dataset : public DatasetBase { public: - Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size, + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, int64 batch_size, int64 num_parallel_calls, bool drop_remainder, const DataTypeVector& output_types, const std::vector& output_shapes, - const NameAttrList& func, std::unique_ptr captured_func, - const Eigen::ThreadPoolDevice* device) + const Eigen::ThreadPoolDevice* device, + MapAndBatchIteratorFunction map_func) : DatasetBase(DatasetContext(ctx)), input_(input), + func_(func), batch_size_(batch_size), num_parallel_calls_(num_parallel_calls), drop_remainder_(drop_remainder), output_types_(output_types), output_shapes_(output_shapes), - map_fn_(func), captured_func_(std::move(captured_func)), - device_(device) { + device_(device), + map_func_(std::move(map_func)) { input_->Ref(); } @@ -123,8 +171,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return std::unique_ptr( - new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")})); + return MakeUnique( + Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")}, + map_func_); } const DataTypeVector& output_dtypes() const override { @@ -143,7 +192,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status AsGraphDefInternal(SerializationContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { - TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name())); + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); Node* input_graph_node = nullptr; TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); Node* batch_size_node; @@ -165,7 +214,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { other_arguments_types.emplace_back(t.dtype()); } AttrValue f; - b->BuildAttrValue(map_fn_, &f); + b->BuildAttrValue(func_, &f); AttrValue other_arguments_types_attr; b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); @@ -185,12 +234,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { private: class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params) + explicit Iterator(const Params& params, + MapAndBatchIteratorFunction map_func) : DatasetIterator(params), mu_(std::make_shared()), cond_var_(std::make_shared()), num_parallel_calls_(std::make_shared( - params.dataset->num_parallel_calls_, mu_, cond_var_)) {} + params.dataset->num_parallel_calls_, mu_, cond_var_)), + map_func_(std::move(map_func)) {} ~Iterator() override { mutex_lock l(*mu_); @@ -297,44 +348,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { int64 num_calls; // access guarded by owner's mutex }; - void Callback(const std::shared_ptr& ctx, - const std::shared_ptr& result, - const std::shared_ptr>& return_values, - int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) { - result->UpdateStatus(status); - if (status.ok()) { - EnsureOutputAllocated(ctx, result, return_values); - for (size_t i = 0; i < return_values->size(); ++i) { - const Tensor& tensor = return_values->at(i); - Tensor* batch = &(result->output)[i]; - if (tensor.NumElements() != - (batch->NumElements() / batch->dim_size(0))) { - TensorShape batch_shape = batch->shape(); - batch_shape.RemoveDim(0); - result->UpdateStatus(errors::InvalidArgument( - "Cannot add tensor to the batch: number of elements does not " - "match. Shapes are: [tensor]: ", - tensor.shape().DebugString(), - ", [batch]: ", batch_shape.DebugString())); - break; - } - // TODO(mrry): Add a version of DoParallelConcat that allows us to - // move `tensor` where possible, to speed up string tensor batching. - Status copy_status = ::tensorflow::functor::DoParallelConcat( - *dataset()->device_, tensor, offset, batch); - if (!copy_status.ok()) { - result->UpdateStatus(copy_status); - break; - } - } - { - mutex_lock l(result->mu); - result->num_elements++; - } - } - CallCompleted(result); - } - void CallCompleted(const std::shared_ptr& result) LOCKS_EXCLUDED(*mu_) { mutex_lock l(*mu_); @@ -363,21 +376,48 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { return; } - // Call `captured_func_(input_element)`, using `Callback` to store the - // result in `result`. - (*ctx->runner())(std::bind( - [this, result, offset](std::shared_ptr ctx, - std::vector input_element) { - std::shared_ptr> return_values( - new std::vector()); - dataset()->captured_func_->RunAsync( - ctx.get(), std::move(input_element), return_values.get(), - [this, ctx, result, return_values, offset](Status status) { - Callback(ctx, result, return_values, offset, status); - }, - prefix()); - }, - ctx, std::move(input_element))); + std::shared_ptr> return_values = + std::make_shared>(); + auto done = [this, ctx, result, return_values, offset](Status status) { + result->UpdateStatus(status); + if (status.ok()) { + EnsureOutputAllocated(ctx, result, return_values); + for (size_t i = 0; i < return_values->size(); ++i) { + const Tensor& tensor = return_values->at(i); + Tensor* batch = &(result->output)[i]; + if (tensor.NumElements() != + (batch->NumElements() / batch->dim_size(0))) { + TensorShape batch_shape = batch->shape(); + batch_shape.RemoveDim(0); + result->UpdateStatus(errors::InvalidArgument( + "Cannot add tensor to the batch: number of elements does " + "not match. Shapes are: [tensor]: ", + tensor.shape().DebugString(), + ", [batch]: ", batch_shape.DebugString())); + break; + } + // TODO(mrry): Add a version of DoParallelConcat that allows us to + // move `tensor` where possible, to speed up string tensor + // batching. + Status copy_status = ::tensorflow::functor::DoParallelConcat( + *dataset()->device_, tensor, offset, batch); + if (!copy_status.ok()) { + result->UpdateStatus(copy_status); + break; + } + } + { + mutex_lock l(result->mu); + result->num_elements++; + } + } + CallCompleted(result); + }; + + // Apply the map function on `input_element`, storing the result in + // `return_values`, and invoking `done` when finished. + map_func_(ctx.get(), prefix(), std::move(input_element), + std::move(return_values), std::move(done)); } Status CopyPartialBatch(Tensor* output, const Tensor& value, @@ -404,7 +444,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { void EnsureRunnerThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { - std::shared_ptr ctx_copy(new IteratorContext(*ctx)); + auto ctx_copy = std::make_shared(*ctx); runner_thread_.reset(ctx->env()->StartThread( {}, "runner_thread", std::bind(&Iterator::RunnerThread, this, ctx_copy))); @@ -509,8 +549,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { while (!busy()) { if (call_counter_ % dataset()->batch_size_ == 0) { - batch_results_.emplace_back( - new BatchResult(dataset()->batch_size_)); + batch_results_.push_back( + std::make_shared(dataset()->batch_size_)); } int64 offset = call_counter_++ % dataset()->batch_size_; new_calls.emplace_back(batch_results_.back(), offset); @@ -527,7 +567,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader, size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { - batch_results_.emplace_back(new BatchResult(dataset()->batch_size_)); + batch_results_.push_back( + std::make_shared(dataset()->batch_size_)); std::shared_ptr result = batch_results_.back(); string prefix = strings::StrCat("batch_results_", index); mutex_lock l(result->mu); @@ -653,6 +694,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const std::shared_ptr cond_var_; // Identifies the maximum number of parallel calls. const std::shared_ptr num_parallel_calls_; + const MapAndBatchIteratorFunction map_func_; + // Counts the number of outstanding calls for this batch. int64 num_calls_ GUARDED_BY(*mu_) = 0; // Counts the total number of calls. @@ -671,9 +714,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const bool drop_remainder_; const DataTypeVector output_types_; const std::vector output_shapes_; - const NameAttrList map_fn_; const std::unique_ptr captured_func_; const Eigen::ThreadPoolDevice* device_; // not owned + const MapAndBatchIteratorFunction map_func_; }; const int op_version_; diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index f112e1dc43..6b6ffabf4f 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -17,7 +17,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -28,6 +30,9 @@ namespace { class MapDatasetOp : public UnaryDatasetOpKernel { public: + using MapIteratorFunction = std::function, std::vector*)>; + explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); @@ -43,8 +48,42 @@ class MapDatasetOp : public UnaryDatasetOpKernel { use_inter_op_parallelism_, &captured_func)); + std::vector indices; + OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); + + MapIteratorFunction map_func; + CapturedFunction* raw_captured_func = captured_func.get(); + if (indices.empty()) { + map_func = [raw_captured_func](IteratorContext* ctx, + std::vector args, + std::vector* out_tensors) { + return raw_captured_func->Run(ctx, std::move(args), out_tensors); + }; + } else { + std::vector can_move = ComputeMoveVector(indices); + map_func = [raw_captured_func, indices, can_move]( + IteratorContext* ctx, std::vector args, + std::vector* out_tensors) { + const std::vector& captured_inputs = + raw_captured_func->captured_inputs(); + size_t num_args = args.size(); + for (size_t i = 0; i < indices.size(); ++i) { + if (indices[i] < num_args) { + if (can_move[i]) { + out_tensors->push_back(std::move(args[indices[i]])); + } else { + out_tensors->push_back(args[indices[i]]); + } + } else { + out_tensors->push_back(captured_inputs[indices[i] - num_args]); + } + } + return Status::OK(); + }; + } + *output = new Dataset(ctx, input, func_, std::move(captured_func), - output_types_, output_shapes_); + output_types_, output_shapes_, std::move(map_func)); } private: @@ -54,13 +93,15 @@ class MapDatasetOp : public UnaryDatasetOpKernel { const NameAttrList& func, std::unique_ptr captured_func, const DataTypeVector& output_types, - const std::vector& output_shapes) + const std::vector& output_shapes, + MapIteratorFunction map_func) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), captured_func_(std::move(captured_func)), output_types_(output_types), - output_shapes_(output_shapes) { + output_shapes_(output_shapes), + map_func_(std::move(map_func)) { input_->Ref(); } @@ -68,8 +109,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return std::unique_ptr( - new Iterator({this, strings::StrCat(prefix, "::Map")})); + return MakeUnique( + Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_); } const DataTypeVector& output_dtypes() const override { @@ -116,8 +157,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params) - : DatasetIterator(params) {} + explicit Iterator(const Params& params, MapIteratorFunction map_func) + : DatasetIterator(params), map_func_(std::move(map_func)) {} Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( @@ -139,10 +180,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - // TODO(mrry): Avoid blocking a threadpool thread. We will need to - // stack-rip the iterators and use async kernels. - Status s = - dataset()->captured_func_->Run(ctx, std::move(args), out_tensors); + Status s = map_func_(ctx, args, out_tensors); if (errors::IsOutOfRange(s)) { // `f` may deliberately raise `errors::OutOfRange` to indicate // that we should terminate the iteration early. @@ -167,6 +205,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: std::unique_ptr input_impl_; + const MapIteratorFunction map_func_; }; const DatasetBase* const input_; @@ -174,6 +213,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { const std::unique_ptr captured_func_; const DataTypeVector output_types_; const std::vector output_shapes_; + const MapIteratorFunction map_func_; }; DataTypeVector output_types_; diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 6abe6c8338..3a14924fba 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/parallel_map_iterator.h" #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/random/random.h" @@ -56,9 +57,55 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { use_inter_op_parallelism_, &captured_func)); + std::vector indices; + OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices)); + + ParallelMapIteratorFunction map_func; + CapturedFunction* raw_captured_func = captured_func.get(); + if (indices.empty()) { + map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix, + std::vector args, + std::vector* out_tensors, + StatusCallback done) { + raw_captured_func->RunAsync(ctx, std::move(args), out_tensors, + std::move(done), prefix); + }; + if (!use_inter_op_parallelism_) { + map_func = [map_func](IteratorContext* ctx, const string& prefix, + std::vector args, + std::vector* out_tensors, + StatusCallback done) { + (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args), + out_tensors, std::move(done))); + }; + } + } else { + std::vector can_move = ComputeMoveVector(indices); + map_func = [raw_captured_func, indices, can_move]( + IteratorContext* ctx, const string& prefix, + std::vector args, std::vector* out_tensors, + StatusCallback done) { + const std::vector& captured_inputs = + raw_captured_func->captured_inputs(); + size_t num_args = args.size(); + for (size_t i = 0; i < indices.size(); ++i) { + if (indices[i] < num_args) { + if (can_move[i]) { + out_tensors->push_back(std::move(args[indices[i]])); + } else { + out_tensors->push_back(args[indices[i]]); + } + } else { + out_tensors->push_back(captured_inputs[indices[i] - num_args]); + } + } + done(Status::OK()); + }; + } + *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_, output_shapes_, use_inter_op_parallelism_, - std::move(captured_func)); + std::move(captured_func), std::move(map_func)); } private: @@ -69,7 +116,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { const DataTypeVector& output_types, const std::vector& output_shapes, bool use_inter_op_parallelism, - std::unique_ptr captured_func) + std::unique_ptr captured_func, + ParallelMapIteratorFunction map_func) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), @@ -77,7 +125,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { output_types_(output_types), output_shapes_(output_shapes), use_inter_op_parallelism_(use_inter_op_parallelism), - captured_func_(std::move(captured_func)) { + captured_func_(std::move(captured_func)), + map_func_(std::move(map_func)) { input_->Ref(); } @@ -89,26 +138,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { return captured_func_->Instantiate(ctx); }; - const string& new_prefix = strings::StrCat(prefix, "::ParallelMap"); - ParallelMapIteratorFunction map_func = - [this, new_prefix](IteratorContext* ctx, - std::vector input_element, - std::vector* result, StatusCallback done) { - captured_func_->RunAsync(ctx, std::move(input_element), result, - std::move(done), new_prefix); - }; - if (!use_inter_op_parallelism_) { - map_func = [map_func]( - IteratorContext* ctx, std::vector input_element, - std::vector* result, StatusCallback done) { - (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), - result, std::move(done))); - }; - } - - return NewParallelMapIterator({this, new_prefix}, input_, - std::move(init_func), std::move(map_func), - num_parallel_calls_); + return NewParallelMapIterator( + {this, strings::StrCat(prefix, "::ParallelMap")}, input_, + std::move(init_func), map_func_, num_parallel_calls_); } const DataTypeVector& output_dtypes() const override { @@ -176,6 +208,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { const std::vector output_shapes_; const bool use_inter_op_parallelism_; const std::unique_ptr captured_func_; + const ParallelMapIteratorFunction map_func_; }; DataTypeVector output_types_; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index 13bd4b6036..ebf41925c9 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -179,7 +180,7 @@ class ParallelMapIterator : public DatasetBaseIterator { void EnsureRunnerThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { - std::shared_ptr ctx_copy(new IteratorContext(*ctx)); + auto ctx_copy = std::make_shared(*ctx); runner_thread_.reset(ctx->env()->StartThread( {}, "runner_thread", std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy))); @@ -208,15 +209,15 @@ class ParallelMapIterator : public DatasetBaseIterator { return; } - // Call `func_(input_element)`, store the result in `result->return_values`, - // and notify `result->notification` to unblock a consumer. auto done = [this, result](Status status) { result->status.Update(status); CallCompleted(result); }; - map_func_(ctx.get(), std::move(input_element), &result->return_values, - std::move(done)); + // Apply the map function on `input_element`, storing the result in + // `result->return_values`, and invoking `done` when finished. + map_func_(ctx.get(), prefix(), std::move(input_element), + &result->return_values, std::move(done)); } Status ProcessResult(const std::shared_ptr& result, @@ -349,9 +350,9 @@ std::unique_ptr NewParallelMapIterator( const DatasetBase* input_dataset, std::function init_func, ParallelMapIteratorFunction map_func, int32 num_parallel_calls) { - return std::unique_ptr( - new ParallelMapIterator(params, input_dataset, std::move(init_func), - std::move(map_func), num_parallel_calls)); + return MakeUnique( + params, input_dataset, std::move(init_func), std::move(map_func), + num_parallel_calls); } } // namespace data diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h index dc26c5cf25..813f13c9e4 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.h +++ b/tensorflow/core/kernels/data/parallel_map_iterator.h @@ -30,7 +30,7 @@ namespace data { // 3. A `std::vector*` to which the function will write the result. // 4. A `StatusCallback` that should be invoked when the function is complete. using ParallelMapIteratorFunction = - std::function, + std::function, std::vector*, StatusCallback)>; // Returns a new iterator that applies `map_func` to the elements of diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc index 1d1a717062..7de5ea8860 100644 --- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc +++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc @@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - auto map_fn = [this](IteratorContext* ctx, + auto map_fn = [this](IteratorContext* ctx, const string& prefix, std::vector input_element, std::vector* result, StatusCallback done) { (*ctx->runner())([this, ctx, input_element, result, done]() { diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py index afd0fc3abf..d444c4082e 100644 --- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py @@ -332,6 +332,37 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): for _ in range(10): self.assertAllEqual([element for _ in range(10)], sess.run(get_next)) + @parameterized.named_parameters( + ("Identity", None, lambda x: x, None), + ("Replicate", None, lambda x: (x, x), None), + ("Swap", (None, None), lambda x, y: (y, x), None), + ("Project", (None, None), lambda x, y: x, None), + ) + def testShortCircuit(self, structure, map_fn, num_parallel_calls): + dataset = self.structuredDataset(structure).repeat().apply( + batching.map_and_batch(map_fn, batch_size=10)) + get_next = dataset.make_one_shot_iterator().get_next() + + with self.cached_session() as sess: + if isinstance(structure, tuple): + expected = map_fn( + *sess.run(self.structuredElement(structure, shape=[10]))) + else: + expected = map_fn( + sess.run(self.structuredElement(structure, shape=[10]))) + self.assertAllEqual(expected, sess.run(get_next)) + + def testShortCircuitCapturedInput(self): + captured_t = array_ops.placeholder(dtypes.int64, shape=[]) + dataset = self.structuredDataset(None).repeat().apply( + batching.map_and_batch(lambda x: captured_t, batch_size=10)) + iterator = dataset.make_initializable_iterator() + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(iterator.initializer, feed_dict={captured_t: 42}) + self.assertAllEqual([42] * 10, sess.run(get_next)) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py index 6b7afafa5d..a0c6b37a6d 100644 --- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py @@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testReturnComponent(self): + def testShortCircuit(self): iterator = ( dataset_ops.Dataset.zip( (dataset_ops.Dataset.range(10), diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 0c372ebb10..4683b1db91 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertIsInstance(actual, sparse_tensor.SparseTensorValue) self.assertSparseValuesEqual(actual, _sparse(i)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertIsInstance(actual, sparse_tensor.SparseTensorValue) self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval()) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -783,19 +783,72 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertTrue(all(tids[0] == tid for tid in tids)) # pylint: enable=g-long-lambda + @parameterized.named_parameters( + ("SequentialIdentity", None, lambda x: x, None), + ("SequentialReplicate", None, lambda x: (x, x), None), + ("SequentialSwap", (None, None), lambda x, y: (y, x), None), + ("SequentialProject", (None, None), lambda x, y: x, None), + ("ParallelIdentity", None, lambda x: x, 10), + ("ParallelReplicate", None, lambda x: (x, x), 10), + ("ParallelSwap", (None, None), lambda x, y: (y, x), 10), + ("ParallelProject", (None, None), lambda x, y: x, 10), + ) + def testShortCircuit(self, structure, map_fn, num_parallel_calls): + dataset = self.structuredDataset(structure).repeat().map( + map_fn, num_parallel_calls=num_parallel_calls) + get_next = dataset.make_one_shot_iterator().get_next() + + with self.cached_session() as sess: + if isinstance(structure, tuple): + expected = map_fn(*sess.run(self.structuredElement(structure))) + else: + expected = map_fn(sess.run(self.structuredElement(structure))) + self.assertEqual(expected, sess.run(get_next)) + + @parameterized.named_parameters( + ("Sequential", None), + ("Parallel", 10), + ) + def testShortCircuitCapturedInput(self, num_parallel_calls): + captured_t = array_ops.placeholder(dtypes.int64, shape=[]) + dataset = self.structuredDataset(None).repeat().map( + lambda x: captured_t, num_parallel_calls=num_parallel_calls) + iterator = dataset.make_initializable_iterator() + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(iterator.initializer, feed_dict={captured_t: 42}) + self.assertEqual(42, sess.run(get_next)) + class MapDatasetBenchmark(test.Benchmark): def benchmarkChainOfMaps(self): chain_lengths = [0, 1, 2, 5, 10, 20, 50] for chain_length in chain_lengths: - for use_inter_op_parallelism in [False, True]: + for mode in ["general", "single-threaded", "short-circuit"]: + if mode == "general": + map_fn = lambda x: x + 1 + use_inter_op_parallelism = True + print_label = "" + benchmark_label = "" + if mode == "single-threaded": + map_fn = lambda x: x + 1 + use_inter_op_parallelism = False + print_label = " (single threaded mode)" + benchmark_label = "_single_threaded" + if mode == "short-circuit": + map_fn = lambda x: x + use_inter_op_parallelism = True # should not have any significance + print_label = " (short circuit mode)" + benchmark_label = "_short_circuit" + with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) for _ in range(chain_length): dataset = dataset_ops.MapDataset( dataset, - lambda x: x, + map_fn, use_inter_op_parallelism=use_inter_op_parallelism) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() @@ -813,25 +866,39 @@ class MapDatasetBenchmark(test.Benchmark): median_wall_time = np.median(deltas) / 100 print("Map dataset chain length%s: %d Median wall time: %f" % - (" (single threaded mode)" if not use_inter_op_parallelism - else "", chain_length, median_wall_time)) + (print_label, chain_length, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, name="benchmark_map_dataset_chain_latency_%d%s" % - (chain_length, "_single_threaded" - if not use_inter_op_parallelism else "")) + (chain_length, benchmark_label)) def benchmarkMapFanOut(self): fan_outs = [1, 2, 5, 10, 20, 50, 100] for fan_out in fan_outs: - for use_inter_op_parallelism in [False, True]: + for mode in ["general", "single-threaded", "short-circuit"]: + if mode == "general": + map_fn = lambda *xs: [x + 1 for x in xs] + use_inter_op_parallelism = True + print_label = "" + benchmark_label = "" + if mode == "single-threaded": + map_fn = lambda *xs: [x + 1 for x in xs] + use_inter_op_parallelism = False + print_label = " (single threaded mode)" + benchmark_label = "_single_threaded" + if mode == "short-circuit": + map_fn = lambda *xs: xs + use_inter_op_parallelism = True # should not have any significance + print_label = " (short circuit mode)" + benchmark_label = "_short_circuit" + with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors( tuple(0 for _ in range(fan_out))).repeat(None) dataset = dataset_ops.MapDataset( dataset, - lambda *xs: xs, + map_fn, use_inter_op_parallelism=use_inter_op_parallelism) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() @@ -849,14 +916,12 @@ class MapDatasetBenchmark(test.Benchmark): median_wall_time = np.median(deltas) / 100 print("Map dataset fan out%s: %d Median wall time: %f" % - (" (single threaded mode)" if not use_inter_op_parallelism - else "", fan_out, median_wall_time)) + (print_label, fan_out, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, - name="benchmark_map_dataset_fan_out_%d%s" % - (fan_out, "_single_threaded" - if not use_inter_op_parallelism else "")) + name="benchmark_map_dataset_fan_out_%d%s" % (fan_out, + benchmark_label)) if __name__ == "__main__": diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py index b730e10949..b73a94e683 100644 --- a/tensorflow/python/data/kernel_tests/test_base.py +++ b/tensorflow/python/data/kernel_tests/test_base.py @@ -19,10 +19,13 @@ from __future__ import print_function import re +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -107,3 +110,29 @@ class DatasetTestBase(test.TestCase): with self.assertRaisesRegexp(exception_class, re.escape(expected_message)): self.evaluate(next2()) + + def structuredDataset(self, structure, shape=None, dtype=dtypes.int64): + """Returns a singleton dataset with the given structure.""" + if shape is None: + shape = [] + if structure is None: + return dataset_ops.Dataset.from_tensors( + array_ops.zeros(shape, dtype=dtype)) + else: + return dataset_ops.Dataset.zip( + tuple([ + self.structuredDataset(substructure, shape, dtype) + for substructure in structure + ])) + + def structuredElement(self, structure, shape=None, dtype=dtypes.int64): + """Returns an element with the given structure.""" + if shape is None: + shape = [] + if structure is None: + return array_ops.zeros(shape, dtype=dtype) + else: + return tuple([ + self.structuredElement(substructure, shape, dtype) + for substructure in structure + ]) -- GitLab From a04cd08ee7a8c5245d76a59849e1f7e8ba8a3f52 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 8 Oct 2018 10:20:52 -0700 Subject: [PATCH 062/411] Allow TensorSpec objects as arguments to defun's get_concrete_function Will be helpful for specifying serving signatures when exporting SavedModels PiperOrigin-RevId: 216207284 --- tensorflow/python/eager/function.py | 24 +++++---------- tensorflow/python/eager/function_test.py | 37 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index bafe07de2b..93168826b1 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -855,20 +855,12 @@ class Function(object): return ret -def _get_defun_inputs_from_signature(signature): - """Maps a signature to graph-construction inputs.""" - function_inputs = [ - graph_placeholder(spec.dtype, spec.shape) - for spec in nest.flatten(signature) - ] - return nest.pack_sequence_as(signature, function_inputs) - - def _get_defun_inputs_from_args(args): """Maps python function args to graph-construction inputs.""" function_inputs = [ graph_placeholder(arg.dtype, arg.shape) - if isinstance(arg, ops.Tensor) else arg for arg in nest.flatten(args) + if isinstance(arg, (ops.Tensor, tensor_spec.TensorSpec)) + else arg for arg in nest.flatten(args) ] return nest.pack_sequence_as(args, function_inputs) @@ -912,12 +904,12 @@ def func_graph_from_py_func(name, with func_graph.as_default(), AutomaticControlDependencies() as a: variable_scope.get_variable_scope().set_use_resource(True) - if signature is None: - func_args = _get_defun_inputs_from_args(args) - func_kwargs = _get_defun_inputs_from_args(kwargs) - else: - func_args = _get_defun_inputs_from_signature(signature) - func_kwargs = {} + if signature is not None: + args = signature + kwargs = {} + + func_args = _get_defun_inputs_from_args(args) + func_kwargs = _get_defun_inputs_from_args(kwargs) # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index a2cfb4b476..57e545be69 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -172,6 +172,43 @@ class FunctionTest(test.TestCase): out = sq_op(t) self.assertAllEqual(out, math_ops.matmul(t, t).numpy()) + def testInputSpecGraphFunction(self): + matmul = function.defun(math_ops.matmul) + + @function.defun + def sq(a): + return matmul(a, a) + + sq_op = sq.get_concrete_function( + tensor_spec.TensorSpec((None, None), dtypes.float32)) + self.assertEqual([None, None], sq_op.output_shapes.as_list()) + + t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + out1 = sq_op(t1) + self.assertAllEqual(out1, math_ops.matmul(t1, t1).numpy()) + + t2 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + out2 = sq_op(t2) + self.assertAllEqual(out2, math_ops.matmul(t2, t2).numpy()) + + def testNestedInputSpecGraphFunction(self): + matmul = function.defun(math_ops.matmul) + + @function.defun + def sq(mats): + ((a, b),) = mats + return matmul(a, b) + + sq_op = sq.get_concrete_function( + [(tensor_spec.TensorSpec((None, None), dtypes.float32), + tensor_spec.TensorSpec((None, None), dtypes.float32))]) + self.assertEqual([None, None], sq_op.output_shapes.as_list()) + + t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + t2 = constant_op.constant([[1.4, 2.4], [3.4, 4.4]]) + out = sq_op(t1, t2) # Flattened structure for inputs to the graph function + self.assertAllEqual(out, math_ops.matmul(t1, t2).numpy()) + def testExecutingStatelessDefunConcurrently(self): @function.defun -- GitLab From 049d98c84ca7474459175914ca49c1fa3c11581d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 10:28:59 -0700 Subject: [PATCH 063/411] Wait for shared resources to initialize before initializing local resources. shared resources are very similar to global variables functionally and they are initialized at the same time but since workers are only waiting for global variables being initialized, there is a race condition that sometimes the shared resource is not ready. PiperOrigin-RevId: 216208679 --- tensorflow/python/training/monitored_session.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 82f0e3be52..a479f38165 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -195,8 +195,12 @@ class Scaffold(object): default_ready_op) if self._ready_for_local_init_op is None: def default_ready_for_local_init_op(): - return variables.report_uninitialized_variables( - variables.global_variables()) + return array_ops.concat([ + variables.report_uninitialized_variables( + variables.global_variables()), + resources.report_uninitialized_resources( + resources.shared_resources()) + ], 0) self._ready_for_local_init_op = Scaffold.get_or_default( 'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP, default_ready_for_local_init_op) -- GitLab From 153decedefc8da1fbd0717f4223b4b053e7aa517 Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Mon, 8 Oct 2018 10:36:38 -0700 Subject: [PATCH 064/411] Add support for SequenceExamples to sequence_feature_columns PiperOrigin-RevId: 216210141 --- .../contrib/estimator/python/estimator/rnn.py | 54 +- tensorflow/contrib/feature_column/BUILD | 21 + .../feature_column/sequence_feature_column.py | 72 +- ...equence_feature_column_integration_test.py | 280 ++++++ .../sequence_feature_column_test.py | 912 ++++++++++++------ .../python/feature_column/feature_column.py | 53 +- tensorflow/python/ops/parsing_ops.py | 13 +- 7 files changed, 1018 insertions(+), 387 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py index 98660bb731..c595f47395 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn.py @@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.layers import core as core_layers from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables @@ -92,55 +91,6 @@ def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'): return rnn_cell_fn -def _concatenate_context_input(sequence_input, context_input): - """Replicates `context_input` across all timesteps of `sequence_input`. - - Expands dimension 1 of `context_input` then tiles it `sequence_length` times. - This value is appended to `sequence_input` on dimension 2 and the result is - returned. - - Args: - sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, - padded_length, d0]`. - context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. - - Returns: - A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, - d0 + d1]`. - - Raises: - ValueError: If `sequence_input` does not have rank 3 or `context_input` does - not have rank 2. - """ - seq_rank_check = check_ops.assert_rank( - sequence_input, - 3, - message='sequence_input must have rank 3', - data=[array_ops.shape(sequence_input)]) - seq_type_check = check_ops.assert_type( - sequence_input, - dtypes.float32, - message='sequence_input must have dtype float32; got {}.'.format( - sequence_input.dtype)) - ctx_rank_check = check_ops.assert_rank( - context_input, - 2, - message='context_input must have rank 2', - data=[array_ops.shape(context_input)]) - ctx_type_check = check_ops.assert_type( - context_input, - dtypes.float32, - message='context_input must have dtype float32; got {}.'.format( - context_input.dtype)) - with ops.control_dependencies( - [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): - padded_length = array_ops.shape(sequence_input)[1] - tiled_context_input = array_ops.tile( - array_ops.expand_dims(context_input, 1), - array_ops.concat([[1], [padded_length], [1]], 0)) - return array_ops.concat([sequence_input, tiled_context_input], 2) - - def _select_last_activations(activations, sequence_lengths): """Selects the nth set of activations for each n in `sequence_length`. @@ -222,8 +172,8 @@ def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns, context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) - sequence_input = _concatenate_context_input(sequence_input, - context_input) + sequence_input = seq_fc.concatenate_context_input( + context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index aab7d0c9e8..a926ffd598 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -27,6 +27,7 @@ py_library( "//tensorflow/python:check_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:tensor_shape", @@ -46,9 +47,29 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python/feature_column", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "sequence_feature_column_integration_test", + srcs = ["python/feature_column/sequence_feature_column_integration_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequence_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python/feature_column", + "//tensorflow/python/keras:layers", ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 05bcdac2ca..dd6da35ed0 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope # pylint: disable=protected-access -# TODO(b/73827486): Support SequenceExample. def sequence_input_layer( @@ -110,6 +109,7 @@ def sequence_input_layer( output_tensors = [] sequence_lengths = [] ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): ordered_columns.append(column) with variable_scope.variable_scope( @@ -121,17 +121,67 @@ def sequence_input_layer( # Flattens the final dimension to produce a 3D Tensor. num_elements = column._variable_shape.num_elements() shape = array_ops.shape(dense_tensor) + target_shape = [shape[0], shape[1], num_elements] output_tensors.append( - array_ops.reshape( - dense_tensor, - shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + array_ops.reshape(dense_tensor, shape=target_shape)) sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) + return array_ops.concat(output_tensors, -1), sequence_length +def concatenate_context_input(context_input, sequence_input): + """Replicates `context_input` across all timesteps of `sequence_input`. + + Expands dimension 1 of `context_input` then tiles it `sequence_length` times. + This value is appended to `sequence_input` on dimension 2 and the result is + returned. + + Args: + context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. + sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, + padded_length, d0]`. + + Returns: + A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, + d0 + d1]`. + + Raises: + ValueError: If `sequence_input` does not have rank 3 or `context_input` does + not have rank 2. + """ + seq_rank_check = check_ops.assert_rank( + sequence_input, + 3, + message='sequence_input must have rank 3', + data=[array_ops.shape(sequence_input)]) + seq_type_check = check_ops.assert_type( + sequence_input, + dtypes.float32, + message='sequence_input must have dtype float32; got {}.'.format( + sequence_input.dtype)) + ctx_rank_check = check_ops.assert_rank( + context_input, + 2, + message='context_input must have rank 2', + data=[array_ops.shape(context_input)]) + ctx_type_check = check_ops.assert_type( + context_input, + dtypes.float32, + message='context_input must have dtype float32; got {}.'.format( + context_input.dtype)) + with ops.control_dependencies( + [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): + padded_length = array_ops.shape(sequence_input)[1] + tiled_context_input = array_ops.tile( + array_ops.expand_dims(context_input, 1), + array_ops.concat([[1], [padded_length], [1]], 0)) + return array_ops.concat([sequence_input, tiled_context_input], 2) + + def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): """Returns a feature column that represents sequences of integers. @@ -453,9 +503,17 @@ class _SequenceNumericColumn( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) - sequence_length = fc._sequence_length_from_sparse_tensor( - sp_tensor, num_elements=self._variable_shape.num_elements()) + + # Get the number of timesteps per example + # For the 2D case, the raw values are grouped according to num_elements; + # for the 3D case, the grouping happens in the third dimension, and + # sequence length is not affected. + num_elements = (self._variable_shape.num_elements() + if sp_tensor.shape.ndims == 2 else 1) + seq_length = fc._sequence_length_from_sparse_tensor( + sp_tensor, num_elements=num_elements) + return fc._SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=seq_length) # pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py new file mode 100644 index 0000000000..d8ca363627 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py @@ -0,0 +1,280 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Integration test for sequence feature columns with SequenceExamples.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import string +import tempfile + +from google.protobuf import text_format + +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.keras.layers import recurrent +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class SequenceFeatureColumnIntegrationTest(test.TestCase): + + def _make_sequence_example(self): + example = example_pb2.SequenceExample() + example.context.feature['int_ctx'].int64_list.value.extend([5]) + example.context.feature['float_ctx'].float_list.value.extend([123.6]) + for val in range(0, 10, 2): + feat = feature_pb2.Feature() + feat.int64_list.value.extend([val] * val) + example.feature_lists.feature_list['int_list'].feature.extend([feat]) + for val in range(1, 11, 2): + feat = feature_pb2.Feature() + feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val) + example.feature_lists.feature_list['str_list'].feature.extend([feat]) + + return example + + def _build_feature_columns(self): + col = fc.categorical_column_with_identity( + 'int_ctx', num_buckets=100) + ctx_cols = [ + fc.embedding_column(col, dimension=10), + fc.numeric_column('float_ctx')] + + identity_col = sfc.sequence_categorical_column_with_identity( + 'int_list', num_buckets=10) + bucket_col = sfc.sequence_categorical_column_with_hash_bucket( + 'bytes_list', hash_bucket_size=100) + seq_cols = [ + fc.embedding_column(identity_col, dimension=10), + fc.embedding_column(bucket_col, dimension=20)] + + return ctx_cols, seq_cols + + def test_sequence_example_into_input_layer(self): + examples = [_make_sequence_example().SerializeToString()] * 100 + ctx_cols, seq_cols = self._build_feature_columns() + + def _parse_example(example): + ctx, seq = parsing_ops.parse_single_sequence_example( + example, + context_features=fc.make_parse_example_spec(ctx_cols), + sequence_features=fc.make_parse_example_spec(seq_cols)) + ctx.update(seq) + return ctx + + ds = dataset_ops.Dataset.from_tensor_slices(examples) + ds = ds.map(_parse_example) + ds = ds.batch(20) + + # Test on a single batch + features = ds.make_one_shot_iterator().get_next() + + # Tile the context features across the sequence features + seq_layer, _ = sfc.sequence_input_layer(features, seq_cols) + ctx_layer = fc.input_layer(features, ctx_cols) + input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer) + + rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) + output = rnn_layer(input_layer) + + with self.cached_session() as sess: + sess.run(variables.global_variables_initializer()) + features_r = sess.run(features) + self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) + + output_r = sess.run(output) + self.assertAllEqual(output_r.shape, [20, 10]) + + +class SequenceExampleParsingTest(test.TestCase): + + def test_seq_ex_in_sequence_categorical_column_with_identity(self): + self._test_parsed_sequence_example( + 'int_list', sfc.sequence_categorical_column_with_identity, + 10, [3, 6], [2, 4, 6]) + + def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_hash_bucket, + 10, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list, + list(string.ascii_lowercase), [3, 4], + [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self): + _, fname = tempfile.mkstemp() + with open(fname, 'w') as f: + f.write(string.ascii_lowercase) + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file, + fname, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def _test_parsed_sequence_example( + self, col_name, col_fn, col_arg, shape, values): + """Helper function to check that each FeatureColumn parses correctly. + + Args: + col_name: string, name to give to the feature column. Should match + the name that the column will parse out of the features dict. + col_fn: function used to create the feature column. For example, + sequence_numeric_column. + col_arg: second arg that the target feature column is expecting. + shape: the expected dense_shape of the feature after parsing into + a SparseTensor. + values: the expected values at index [0, 2, 6] of the feature + after parsing into a SparseTensor. + """ + example = _make_sequence_example() + columns = [ + fc.categorical_column_with_identity('int_ctx', num_buckets=100), + fc.numeric_column('float_ctx'), + col_fn(col_name, col_arg) + ] + context, seq_features = parsing_ops.parse_single_sequence_example( + example.SerializeToString(), + context_features=fc.make_parse_example_spec(columns[:2]), + sequence_features=fc.make_parse_example_spec(columns[2:])) + + with self.cached_session() as sess: + ctx_result, seq_result = sess.run([context, seq_features]) + self.assertEqual(list(seq_result[col_name].dense_shape), shape) + self.assertEqual( + list(seq_result[col_name].values[[0, 2, 6]]), values) + self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) + self.assertEqual(ctx_result['int_ctx'].values[0], 5) + self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) + self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1) + + +_SEQ_EX_PROTO = """ +context { + feature { + key: "float_ctx" + value { + float_list { + value: 123.6 + } + } + } + feature { + key: "int_ctx" + value { + int64_list { + value: 5 + } + } + } +} +feature_lists { + feature_list { + key: "bytes_list" + value { + feature { + bytes_list { + value: "a" + } + } + feature { + bytes_list { + value: "b" + value: "c" + } + } + feature { + bytes_list { + value: "d" + value: "e" + value: "f" + value: "g" + } + } + } + } + feature_list { + key: "float_list" + value { + feature { + float_list { + value: 1.0 + } + } + feature { + float_list { + value: 3.0 + value: 3.0 + value: 3.0 + } + } + feature { + float_list { + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + } + } + } + } + feature_list { + key: "int_list" + value { + feature { + int64_list { + value: 2 + value: 2 + } + } + feature { + int64_list { + value: 4 + value: 4 + value: 4 + value: 4 + } + } + feature { + int64_list { + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + } + } + } + } +} +""" + + +def _make_sequence_example(): + example = example_pb2.SequenceExample() + return text_format.Parse(_SEQ_EX_PROTO, example) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 45d7b74046..929e83523a 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +from absl.testing import parameterized import numpy as np from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc @@ -28,28 +29,61 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session -class SequenceInputLayerTest(test.TestCase): +class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)), + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + indices=( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 0, 0, 1), + dense_shape=(2, 2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[2], [0]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 1, 1, 2, 0), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]], + # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_embedding_column( + self, sparse_input_a, sparse_input_b, expected_input_layer, + expected_sequence_length): - def test_embedding_column(self): vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 @@ -70,14 +104,6 @@ class SequenceInputLayerTest(test.TestCase): return embedding_values return _initializer - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], - ] - expected_sequence_length = [1, 2] - categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( @@ -233,29 +259,53 @@ class SequenceInputLayerTest(test.TestCase): }, feature_columns=shared_embedding_columns) - def test_indicator_column(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [1, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 1, 0), + dense_shape=(2, 2)), + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [1, 0] + [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + indices=( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 0, 0, 1), + dense_shape=(2, 2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[1], [0]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 1, 1, 1, 0), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]], + # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -] + [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_indicator_column( + self, sparse_input_a, sparse_input_b, expected_input_layer, + expected_sequence_length): vocabulary_size_a = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) vocabulary_size_b = 2 - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [1, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 1, 0), - dense_shape=(2, 2)) - - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [1, 0] - [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], - ] - expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) @@ -298,18 +348,32 @@ class SequenceInputLayerTest(test.TestCase): features={'aaa': sparse_input}, feature_columns=[indicator_column_a]) - def test_numeric_column(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_input_layer = [ - [[0.], [1.]], - [[10.], [0.]], - ] - expected_sequence_length = [2, 1] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1] + # example 1, [10.] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)), + 'expected_input_layer': [ + [[0.], [1.]], + [[10.], [0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(20, 3, 5., 3., 8.), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_numeric_column( + self, sparse_input, expected_input_layer, expected_sequence_length): numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( @@ -321,21 +385,38 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) - def test_numeric_column_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] + # example 1, [10., 11., 12., 13.] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 4)), + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + ) + def test_numeric_column_multi_dim( + self, sparse_input, expected_input_layer, expected_sequence_length): """Tests sequence_input_layer for multi-dimensional numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - # The output of numeric_column._get_dense_tensor should be flattened. - expected_input_layer = [ - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]], - ] - expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( @@ -377,6 +458,134 @@ class SequenceInputLayerTest(test.TestCase): r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): sess.run(sequence_length) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_shape': [2, 2, 4]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 4)), + 'expected_shape': [2, 2, 4]}, + ) + def test_static_shape_from_tensors_numeric( + self, sparse_input, expected_shape): + """Tests that we return a known static shape when we have one.""" + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected_shape': [4, 2, 3]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 0, 2), + dense_shape=(4, 2, 2)), + 'expected_shape': [4, 2, 3]} + ) + def test_static_shape_from_tensors_indicator( + self, sparse_input, expected_shape): + """Tests that we return a known static shape when we have one.""" + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=3) + indicator_column = fc.indicator_column(categorical_column) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, feature_columns=[indicator_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + +class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): + """Tests the utility fn concatenate_context_input.""" + + def test_concatenate_context_input(self): + seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2)) + context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + input_layer = sfc.concatenate_context_input(context_input, seq_input) + + expected = np.array([ + [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]], + [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]] + ], dtype=np.float32) + with monitored_session.MonitoredSession() as sess: + output = sess.run(input_layer) + self.assertAllEqual(expected, output) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_3', + 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))}, + {'testcase_name': 'rank_gt_3', + 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))} + ) + def test_sequence_input_throws_error(self, seq_input): + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'): + sfc.concatenate_context_input(context_input, seq_input) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_2', + 'context_input': ops.convert_to_tensor(np.arange(100))}, + {'testcase_name': 'rank_gt_2', + 'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))} + ) + def test_context_input_throws_error(self, context_input): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_seq_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'sequence_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_context_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'context_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + class InputLayerTest(test.TestCase): """Tests input_layer with sequence feature columns.""" @@ -443,75 +652,79 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual): test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) -class SequenceCategoricalColumnWithIdentityTest(test.TestCase): - - def test_get_sparse_tensors(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((1, 2, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) +class SequenceCategoricalColumnWithIdentityTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=(6, 7, 8), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=(6, 7, 8), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): + column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - def test_get_sparse_tensors_inputs3d(self): - """Tests _get_sparse_tensors when the input is already 3D Tensor.""" - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 2, 0), - dense_shape=(2, 2, 1)) - - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'Column aaa expected ID tensor of rank 2\.\s*' - r'id_tensor shape:\s*\[2 2 1\]'): - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({'aaa': inputs})) - with monitored_session.MonitoredSession() as sess: - id_weight_pair.id_tensor.eval(session=sess) - - -class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithHashBucketTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + # Ignored to avoid hash dependence in test. + values=np.array((0, 0, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + # Ignored to avoid hash dependence in test. + values=np.array((0, 0, 0), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_hash_bucket( 'aaa', hash_bucket_size=10) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2)) - - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - # Ignored to avoid hash dependence in test. - values=np.array((0, 0, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_indices_shape( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) + self, expected, id_weight_pair.id_tensor.eval(session=sess)) -class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): +class SequenceCategoricalColumnWithVocabularyFileTest( + test.TestCase, parameterized.TestCase): def _write_vocab(self, vocab_strings, file_name): vocab_file = os.path.join(self.get_temp_dir(), file_name) @@ -527,68 +740,120 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): 'wire_vocabulary.txt') self._wire_vocabulary_size = 3 - def test_get_sparse_tensors(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'skywalker', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=np.array((0, -1, 2), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithVocabularyListTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'skywalker', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=np.array((0, -1, 2), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceEmbeddingColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceEmbeddingColumnTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected': [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 0, 2), + dense_shape=(4, 2, 2)), + 'expected': [ + # example 0, ids [[2]] + [[7., 11.], [0., 0.]], + # example 1, ids [[0, 1], [2]] + [[2, 3.5], [7., 11.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [[1], [0, 2]] + [[3., 5.], [4., 6.5]]]} + ) + def test_get_sequence_dense_tensor(self, inputs, expected): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 @@ -601,17 +866,6 @@ class SequenceEmbeddingColumnTest(test.TestCase): self.assertIsNone(partition_info) return embedding_values - expected_lookups = [ - # example 0, ids [2] - [[7., 11.], [0., 0.]], - # example 1, ids [0, 1] - [[1., 2.], [3., 5.]], - # example 2, ids [] - [[0., 0.], [0., 0.]], - # example 3, ids [1] - [[3., 5.], [0., 0.]], - ] - categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( @@ -619,24 +873,35 @@ class SequenceEmbeddingColumnTest(test.TestCase): initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) - self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, embedding_lookup.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 2), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs, expected_sequence_length): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) @@ -644,7 +909,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -855,56 +1120,87 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase): expected_sequence_length_b, sequence_length_b.eval(session=sess)) -class SequenceIndicatorColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): +class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected': [ + # example 0, ids [2] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [0, 1] + [[1., 0., 0.], [0., 1., 0.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [1] + [[0., 1., 0.], [0., 0., 0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [2, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 2, 2), + dense_shape=(4, 2, 2)), + 'expected': [ + # example 0, ids [[2]] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [[0, 1], [2]] + [[1., 1., 0.], [0., 0., 1.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [[1], [2, 2]] + [[0., 1., 0.], [0., 0., 2.]]]} + ) + def test_get_sequence_dense_tensor(self, inputs, expected): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - - expected_lookups = [ - # example 0, ids [2] - [[0., 0., 1.], [0., 0., 0.]], - # example 1, ids [0, 1] - [[1., 0., 0.], [0., 1., 0.]], - # example 2, ids [] - [[0., 0., 0.], [0., 0., 0.]], - # example 3, ids [1] - [[0., 1., 0.], [0., 0., 0.]], - ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, indicator_tensor.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 2), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs, expected_sequence_length): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -938,7 +1234,7 @@ class SequenceIndicatorColumnTest(test.TestCase): expected_sequence_length, sequence_length.eval(session=sess)) -class SequenceNumericColumnTest(test.TestCase): +class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): def test_defaults(self): a = sfc.sequence_numeric_column('aaa') @@ -971,25 +1267,36 @@ class SequenceNumericColumnTest(test.TestCase): with self.assertRaisesRegexp(TypeError, 'must be a callable'): sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') - def test_get_sequence_dense_tensor(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_dense_tensor = [ - [[0.], [1.]], - [[10.], [0.]], - ] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1] + # example 1, [10.] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)), + 'expected': [ + [[0.], [1.]], + [[10.], [0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(20, 3, 5., 3., 8.), + dense_shape=(2, 2, 2)), + 'expected': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]]}, + ) + def test_get_sequence_dense_tensor(self, inputs, expected): numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) + self.assertAllEqual(expected, dense_tensor.eval(session=sess)) def test_get_sequence_dense_tensor_with_normalizer_fn(self): @@ -1026,41 +1333,34 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_get_sequence_dense_tensor_with_shape(self): - """Tests get_sequence_dense_tensor with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_dense_tensor = [ - [[0., 1., 2.], [3., 4., 5.]], - [[10., 11., 12.], [0., 0., 0.]], - ] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_get_dense_tensor_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_dense_tensor': [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), + (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), + (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 8)), + 'expected_dense_tensor': [ + [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]], + [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]], + [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]], + [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]}, + ) + def test_get_dense_tensor_multi_dim( + self, sparse_input, expected_dense_tensor): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - expected_dense_tensor = [ - [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], - [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], - ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( @@ -1070,43 +1370,55 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_sequence_length(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2., 0., 1.), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2., 0., 1., 2.), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '2D_with_shape', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2., 0., 1.), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 1], + 'shape': (2,)}, + {'testcase_name': '3D_with_shape', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2., 0., 1., 2.), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (2,)}, + ) + def test_sequence_length(self, inputs, expected_sequence_length, shape): + numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype) - def test_sequence_length_with_shape(self): - """Tests _sequence_length with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa') - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 5352796174..28a8286544 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2660,6 +2660,7 @@ class _EmbeddingColumn( inputs=inputs, weight_collections=weight_collections, trainable=trainable) + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = _sequence_length_from_sparse_tensor( sparse_tensors.id_tensor) @@ -3383,6 +3384,16 @@ class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn, def _verify_static_batch_size_equality(tensors, columns): + """Validates that the first dim (batch size) of all tensors are equal or None. + + Args: + tensors: list of tensors to check. + columns: list of feature columns matching tensors. Will be used for error + messaging. + + Raises: + ValueError: if one of the tensors has a variant batch size + """ # bath_size is a tf.Dimension object. expected_batch_size = None for i in range(0, len(tensors)): @@ -3403,9 +3414,18 @@ def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): with ops.name_scope(None, 'sequence_length') as name_scope: row_ids = sp_tensor.indices[:, 0] column_ids = sp_tensor.indices[:, 1] + # Add one to convert column indices to element length column_ids += array_ops.ones_like(column_ids) - seq_length = math_ops.to_int64( - math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # Get the number of elements we will have per example/row + seq_length = math_ops.segment_max(column_ids, segment_ids=row_ids) + + # The raw values are grouped according to num_elements; + # how many entities will we have after grouping? + # Example: orig tensor [[1, 2], [3]], col_ids = (0, 1, 1), + # row_ids = (0, 0, 1), seq_length = [2, 1]. If num_elements = 2, + # these will get grouped, and the final seq_length is [1, 1] + seq_length = math_ops.to_int64(math_ops.ceil(seq_length / num_elements)) + # If the last n rows do not have ids, seq_length will have shape # [batch_size - n]. Pad the remaining values with zeros. n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] @@ -3439,25 +3459,14 @@ class _SequenceCategoricalColumn( sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access id_tensor = sparse_tensors.id_tensor weight_tensor = sparse_tensors.weight_tensor - # Expands final dimension, so that embeddings are not combined during - # embedding lookup. - check_id_rank = check_ops.assert_equal( - array_ops.rank(id_tensor), 2, - data=[ - 'Column {} expected ID tensor of rank 2. '.format(self.name), - 'id_tensor shape: ', array_ops.shape(id_tensor)]) - with ops.control_dependencies([check_id_rank]): - id_tensor = sparse_ops.sparse_reshape( - id_tensor, - shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + + # Expands third dimension, if necessary so that embeddings are not + # combined during embedding lookup. If the tensor is already 3D, leave + # as-is. + shape = array_ops.shape(id_tensor) + target_shape = [shape[0], shape[1], -1] + id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape) if weight_tensor is not None: - check_weight_rank = check_ops.assert_equal( - array_ops.rank(weight_tensor), 2, - data=[ - 'Column {} expected weight tensor of rank 2.'.format(self.name), - 'weight_tensor shape:', array_ops.shape(weight_tensor)]) - with ops.control_dependencies([check_weight_rank]): - weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, - shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape) + return _CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index ff50fe0d09..a2da6412ed 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -217,21 +217,21 @@ def _features_to_raw_params(features, types): feature = features[key] if isinstance(feature, VarLenFeature): if VarLenFeature not in types: - raise ValueError("Unsupported VarLenFeature %s." % feature) + raise ValueError("Unsupported VarLenFeature %s." % (feature,)) if not feature.dtype: raise ValueError("Missing type for feature %s." % key) sparse_keys.append(key) sparse_types.append(feature.dtype) elif isinstance(feature, SparseFeature): if SparseFeature not in types: - raise ValueError("Unsupported SparseFeature %s." % feature) + raise ValueError("Unsupported SparseFeature %s." % (feature,)) if not feature.index_key: raise ValueError( - "Missing index_key for SparseFeature %s." % feature) + "Missing index_key for SparseFeature %s." % (feature,)) if not feature.value_key: raise ValueError( - "Missing value_key for SparseFeature %s." % feature) + "Missing value_key for SparseFeature %s." % (feature,)) if not feature.dtype: raise ValueError("Missing type for feature %s." % key) index_keys = feature.index_key @@ -260,7 +260,7 @@ def _features_to_raw_params(features, types): sparse_types.append(feature.dtype) elif isinstance(feature, FixedLenFeature): if FixedLenFeature not in types: - raise ValueError("Unsupported FixedLenFeature %s." % feature) + raise ValueError("Unsupported FixedLenFeature %s." % (feature,)) if not feature.dtype: raise ValueError("Missing type for feature %s." % key) if feature.shape is None: @@ -281,7 +281,8 @@ def _features_to_raw_params(features, types): dense_defaults[key] = feature.default_value elif isinstance(feature, FixedLenSequenceFeature): if FixedLenSequenceFeature not in types: - raise ValueError("Unsupported FixedLenSequenceFeature %s." % feature) + raise ValueError("Unsupported FixedLenSequenceFeature %s." % ( + feature,)) if not feature.dtype: raise ValueError("Missing type for feature %s." % key) if feature.shape is None: -- GitLab From 8ef3e7c8c053cb6dad530e13c478bbd406ea2c95 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 8 Oct 2018 10:43:01 -0700 Subject: [PATCH 065/411] Part 1/3 of the feature sync to the Keras 2.2.4 API. PiperOrigin-RevId: 216211279 --- tensorflow/python/keras/activations.py | 5 + tensorflow/python/keras/activations_test.py | 10 + tensorflow/python/keras/backend.py | 81 ++++++-- tensorflow/python/keras/backend_test.py | 44 ++++- tensorflow/python/keras/callbacks.py | 4 + tensorflow/python/keras/engine/network.py | 9 +- .../python/keras/layers/convolutional.py | 177 ++++++++++++----- .../python/keras/layers/convolutional_test.py | 31 +++ tensorflow/python/keras/layers/pooling.py | 185 +++++++++++++----- .../python/keras/layers/pooling_test.py | 30 +++ tensorflow/python/keras/layers/wrappers.py | 3 + tensorflow/python/keras/testing_utils.py | 5 +- tensorflow/python/keras/utils/conv_utils.py | 45 +++-- .../python/keras/utils/multi_gpu_utils.py | 17 +- .../keras/utils/multi_gpu_utils_test.py | 26 +++ tensorflow/python/keras/utils/np_utils.py | 5 +- .../v1/tensorflow.keras.activations.pbtxt | 4 + .../golden/v1/tensorflow.keras.backend.pbtxt | 4 +- ...low.keras.layers.-average-pooling1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 2 +- ...flow.keras.layers.-conv2-d-transpose.pbtxt | 2 +- ...flow.keras.layers.-conv3-d-transpose.pbtxt | 2 +- ...ras.layers.-convolution2-d-transpose.pbtxt | 2 +- ...ras.layers.-convolution3-d-transpose.pbtxt | 2 +- ...as.layers.-global-average-pooling1-d.pbtxt | 4 +- ...low.keras.layers.-global-avg-pool1-d.pbtxt | 4 +- ...low.keras.layers.-global-max-pool1-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 2 +- .../golden/v1/tensorflow.keras.utils.pbtxt | 2 +- .../v2/tensorflow.keras.activations.pbtxt | 4 + .../golden/v2/tensorflow.keras.backend.pbtxt | 4 +- ...low.keras.layers.-average-pooling1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 2 +- ...flow.keras.layers.-conv2-d-transpose.pbtxt | 2 +- ...flow.keras.layers.-conv3-d-transpose.pbtxt | 2 +- ...ras.layers.-convolution2-d-transpose.pbtxt | 2 +- ...ras.layers.-convolution3-d-transpose.pbtxt | 2 +- ...as.layers.-global-average-pooling1-d.pbtxt | 4 +- ...low.keras.layers.-global-avg-pool1-d.pbtxt | 4 +- ...low.keras.layers.-global-max-pool1-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 2 +- .../golden/v2/tensorflow.keras.utils.pbtxt | 2 +- 46 files changed, 581 insertions(+), 172 deletions(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index 99645de736..d69791ce8d 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -160,6 +160,11 @@ def sigmoid(x): return nn.sigmoid(x) +@tf_export('keras.activations.exponential') +def exponential(x): + return math_ops.exp(x) + + @tf_export('keras.activations.hard_sigmoid') def hard_sigmoid(x): """Hard sigmoid activation function. diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py index dd0bbcff39..ad238cb0a9 100644 --- a/tensorflow/python/keras/activations_test.py +++ b/tensorflow/python/keras/activations_test.py @@ -169,6 +169,16 @@ class KerasActivationsTest(test.TestCase): expected = np.tanh(test_values) self.assertAllClose(result, expected, rtol=1e-05) + def test_exponential(self): + with self.cached_session(): + test_values = np.random.random((2, 5)) + x = keras.backend.placeholder(ndim=2) + exp = keras.activations.exponential(x) + f = keras.backend.function([x], [exp]) + result = f([test_values])[0] + expected = np.exp(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + def test_linear(self): x = np.random.random((10, 5)) self.assertAllClose(x, keras.activations.linear(x)) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 63e776a06b..13f52fbae7 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -2223,7 +2223,7 @@ def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): @tf_export('keras.backend.batch_normalization') -def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): +def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): """Applies batch normalization on x given mean, var, beta and gamma. I.e. returns: @@ -2235,11 +2235,49 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): var: Variance of batch. beta: Tensor with which to center the input. gamma: Tensor by which to scale the input. + axis: Integer, the axis that should be normalized. + (typically the features axis). epsilon: Fuzz factor. Returns: A tensor. """ + if ndim(x) == 4: + # The CPU implementation of `fused_batch_norm` only supports NHWC + if axis == 1 or axis == -3: + tf_data_format = 'NCHW' + elif axis == 3 or axis == -1: + tf_data_format = 'NHWC' + else: + tf_data_format = None + + if (tf_data_format == 'NHWC' or + tf_data_format == 'NCHW' and _has_nchw_support()): + # The mean / var / beta / gamma tensors may be broadcasted + # so they may have extra axes of size 1, which should be squeezed. + if ndim(mean) > 1: + mean = array_ops.reshape(mean, [-1]) + if ndim(var) > 1: + var = array_ops.reshape(var, [-1]) + if beta is None: + beta = zeros_like(mean) + elif ndim(beta) > 1: + beta = array_ops.reshape(beta, [-1]) + if gamma is None: + gamma = ones_like(mean) + elif ndim(gamma) > 1: + gamma = array_ops.reshape(gamma, [-1]) + y, _, _ = nn.fused_batch_norm( + x, + gamma, + beta, + epsilon=epsilon, + mean=mean, + variance=var, + data_format=tf_data_format, + is_training=False + ) + return y return nn.batch_normalization(x, mean, var, beta, gamma, epsilon) @@ -2880,7 +2918,7 @@ class Function(object): if session_kwargs: raise ValueError('Some keys in session_kwargs are not supported at this ' - 'time: %s', session_kwargs.keys()) + 'time: %s', (session_kwargs.keys(),)) self._callable_fn = None self._feed_arrays = None @@ -3798,19 +3836,23 @@ def _preprocess_conv1d_input(x, data_format): return x, tf_data_format -def _preprocess_conv2d_input(x, data_format): +def _preprocess_conv2d_input(x, data_format, force_transpose=False): """Transpose and cast the input before the conv2d. Arguments: x: input tensor. data_format: string, `"channels_last"` or `"channels_first"`. + force_transpose: Boolean. If True, the input will always be transposed + from NCHW to NHWC if `data_format` is `"channels_first"`. + If False, the transposition only occurs on CPU (GPU ops are + assumed to support NCHW). Returns: A tensor. """ tf_data_format = 'NHWC' if data_format == 'channels_first': - if not _has_nchw_support(): + if not _has_nchw_support() or force_transpose: x = array_ops.transpose(x, (0, 2, 3, 1)) # NCHW -> NHWC else: tf_data_format = 'NCHW' @@ -3958,7 +4000,8 @@ def conv2d_transpose(x, output_shape, strides=(1, 1), padding='valid', - data_format=None): + data_format=None, + dilation_rate=(1, 1)): """2D deconvolution (i.e. transposed convolution). @@ -3972,6 +4015,7 @@ def conv2d_transpose(x, data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. + dilation_rate: Tuple of 2 integers. Returns: A tensor, result of transposed 2D convolution. @@ -3987,7 +4031,13 @@ def conv2d_transpose(x, if isinstance(output_shape, (tuple, list)): output_shape = array_ops.stack(output_shape) - x, tf_data_format = _preprocess_conv2d_input(x, data_format) + # `atrous_conv2d_transpose` only supports NHWC format, even on GPU. + if data_format == 'channels_first' and dilation_rate != (1, 1): + force_transpose = True + else: + force_transpose = False + + x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose) if data_format == 'channels_first' and tf_data_format == 'NHWC': output_shape = (output_shape[0], output_shape[2], output_shape[3], @@ -4002,13 +4052,18 @@ def conv2d_transpose(x, else: strides = (1, 1) + strides - x = nn.conv2d_transpose( - x, - kernel, - output_shape, - strides, - padding=padding, - data_format=tf_data_format) + if dilation_rate == (1, 1): + x = nn.conv2d_transpose(x, kernel, output_shape, strides, + padding=padding, + data_format=tf_data_format) + else: + assert dilation_rate[0] == dilation_rate[1] + x = nn.atrous_conv2d_transpose( + x, + kernel, + output_shape, + rate=dilation_rate[0], + padding=padding) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = array_ops.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index ab71589940..0834448699 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -26,6 +26,7 @@ from tensorflow.python import keras from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import nn from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.util import tf_inspect @@ -1381,6 +1382,36 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase): self.assertEqual(mean.get_shape().as_list(), [3,]) self.assertEqual(var.get_shape().as_list(), [3,]) + def test_batch_normalization(self): + g_val = np.random.random((3,)) + b_val = np.random.random((3,)) + gamma = keras.backend.variable(g_val) + beta = keras.backend.variable(b_val) + + # 3D NHC case + val = np.random.random((10, 5, 3)) + x = keras.backend.variable(val) + mean, var = nn.moments(x, (0, 1), None, None, False) + normed = keras.backend.batch_normalization( + x, mean, var, beta, gamma, axis=-1, epsilon=1e-3) + self.assertEqual(normed.shape.as_list(), [10, 5, 3]) + + # 4D NHWC case + val = np.random.random((10, 5, 5, 3)) + x = keras.backend.variable(val) + mean, var = nn.moments(x, (0, 1, 2), None, None, False) + normed = keras.backend.batch_normalization( + x, mean, var, beta, gamma, axis=-1, epsilon=1e-3) + self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3]) + + # 4D NCHW case + val = np.random.random((10, 3, 5, 5)) + x = keras.backend.variable(val) + mean, var = nn.moments(x, (0, 2, 3), None, None, False) + normed = keras.backend.batch_normalization( + x, mean, var, beta, gamma, axis=1, epsilon=1e-3) + self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5]) + class TestCTC(test.TestCase): @@ -1506,12 +1537,13 @@ class TestRandomOps(test.TestCase): self.assertAllClose(np.min(y), -2., atol=0.1) def test_string_input(self): - seq = keras.Sequential([ - keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string), - keras.layers.Lambda(lambda x: x[0]) - ]) - preds = seq.predict([['tensorflow eager']]) - self.assertEqual(preds.shape, (1,)) + with self.cached_session(): + seq = keras.Sequential([ + keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string), + keras.layers.Lambda(lambda x: x[0]) + ]) + preds = seq.predict([['tensorflow eager']]) + self.assertEqual(preds.shape, (1,)) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 6dfbbf3694..3d6000f223 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -781,6 +781,10 @@ class LearningRateScheduler(Callback): print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr)) + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + logs['lr'] = K.get_value(self.model.optimizer.lr) + @tf_export('keras.callbacks.TensorBoard') class TensorBoard(Callback): diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 918488bd7a..5969fea2b2 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -1641,10 +1641,11 @@ class Network(base_layer.Layer): ValueError: if `summary()` is called before the model is built. """ if not self.built: - raise ValueError('This model has never been called, thus its weights ' - 'have not yet been created, so no summary can be ' - 'displayed. Build the model first ' - '(e.g. by calling it on some data).') + raise ValueError('This model has not yet been built. ' + 'Build the model first by calling `build()` or calling ' + '`fit()` with some data, or specify ' + 'an `input_shape` argument in the first layer(s) for ' + 'automatic build.') layer_utils.print_summary(self, line_length=line_length, positions=positions, diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index d00def07bb..8f5872385c 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -645,6 +645,14 @@ class Conv2DTranspose(Conv2D): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + output_padding: An integer or tuple/list of 2 integers, + specifying the amount of padding along the height and width + of the output tensor. + Can be a single integer to specify the same value for all + spatial dimensions. + The amount of output padding along a given dimension must be + lower than the stride along that same dimension. + If set to `None` (default), the output shape is inferred. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -700,7 +708,9 @@ class Conv2DTranspose(Conv2D): kernel_size, strides=(1, 1), padding='valid', + output_padding=None, data_format=None, + dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', @@ -717,6 +727,7 @@ class Conv2DTranspose(Conv2D): strides=strides, padding=padding, data_format=data_format, + dilation_rate=dilation_rate, activation=activations.get(activation), use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), @@ -728,6 +739,16 @@ class Conv2DTranspose(Conv2D): bias_constraint=constraints.get(bias_constraint), **kwargs) + self.output_padding = output_padding + if self.output_padding is not None: + self.output_padding = conv_utils.normalize_tuple( + self.output_padding, 2, 'output_padding') + for stride, out_pad in zip(self.strides, self.output_padding): + if out_pad >= stride: + raise ValueError('Stride ' + str(self.strides) + ' must be ' + 'greater than output padding ' + + str(self.output_padding)) + def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if len(input_shape) != 4: @@ -769,51 +790,50 @@ class Conv2DTranspose(Conv2D): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': - c_axis, h_axis, w_axis = 1, 2, 3 + h_axis, w_axis = 2, 3 else: - c_axis, h_axis, w_axis = 3, 1, 2 + h_axis, w_axis = 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides + if self.output_padding is None: + out_pad_h = out_pad_w = None + else: + out_pad_h, out_pad_w = self.output_padding + # Infer the dynamic output shape: out_height = conv_utils.deconv_output_length(height, kernel_h, - self.padding, - stride_h) + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + dilation=self.dilation_rate[0]) out_width = conv_utils.deconv_output_length(width, kernel_w, - self.padding, - stride_w) + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + dilation=self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) - strides = (1, 1, stride_h, stride_w) else: output_shape = (batch_size, out_height, out_width, self.filters) - strides = (1, stride_h, stride_w, 1) output_shape_tensor = array_ops.stack(output_shape) - outputs = nn.conv2d_transpose( + outputs = backend.conv2d_transpose( inputs, self.kernel, output_shape_tensor, - strides, - padding=self.padding.upper(), - data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate) if not context.executing_eagerly(): # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) + out_shape = self.compute_output_shape(inputs.shape) outputs.set_shape(out_shape) if self.use_bias: @@ -837,13 +857,33 @@ class Conv2DTranspose(Conv2D): kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides + if self.output_padding is None: + out_pad_h = out_pad_w = None + else: + out_pad_h, out_pad_w = self.output_padding + output_shape[c_axis] = self.filters output_shape[h_axis] = conv_utils.deconv_output_length( - output_shape[h_axis], kernel_h, self.padding, stride_h) + output_shape[h_axis], + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h, + dilation=self.dilation_rate[0]) output_shape[w_axis] = conv_utils.deconv_output_length( - output_shape[w_axis], kernel_w, self.padding, stride_w) + output_shape[w_axis], + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w, + dilation=self.dilation_rate[1]) return tensor_shape.TensorShape(output_shape) + def get_config(self): + config = super(Conv2DTranspose, self).get_config() + config['output_padding'] = self.output_padding + return config + @tf_export('keras.layers.Conv3DTranspose', 'keras.layers.Convolution3DTranspose') @@ -878,6 +918,14 @@ class Conv3DTranspose(Conv3D): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + output_padding: An integer or tuple/list of 3 integers, + specifying the amount of padding along the depth, height, and + width. + Can be a single integer to specify the same value for all + spatial dimensions. + The amount of output padding along a given dimension must be + lower than the stride along that same dimension. + If set to `None` (default), the output shape is inferred. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -943,6 +991,7 @@ class Conv3DTranspose(Conv3D): kernel_size, strides=(1, 1, 1), padding='valid', + output_padding=None, data_format=None, activation=None, use_bias=True, @@ -971,6 +1020,16 @@ class Conv3DTranspose(Conv3D): bias_constraint=constraints.get(bias_constraint), **kwargs) + self.output_padding = output_padding + if self.output_padding is not None: + self.output_padding = conv_utils.normalize_tuple( + self.output_padding, 3, 'output_padding') + for stride, out_pad in zip(self.strides, self.output_padding): + if out_pad >= stride: + raise ValueError('Stride ' + str(self.strides) + ' must be ' + 'greater than output padding ' + + str(self.output_padding)) + def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if len(input_shape) != 5: @@ -1012,11 +1071,9 @@ class Conv3DTranspose(Conv3D): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': - c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 + d_axis, h_axis, w_axis = 2, 3, 4 else: - c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 - - self.input_spec = InputSpec(ndim=5, axes={c_axis: inputs_shape[c_axis]}) + d_axis, h_axis, w_axis = 1, 2, 3 depth = inputs_shape[d_axis] height = inputs_shape[h_axis] @@ -1025,19 +1082,27 @@ class Conv3DTranspose(Conv3D): kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides + if self.output_padding is None: + out_pad_d = out_pad_h = out_pad_w = None + else: + out_pad_d, out_pad_h, out_pad_w = self.output_padding + # Infer the dynamic output shape: out_depth = conv_utils.deconv_output_length(depth, kernel_d, - self.padding, - stride_d) + padding=self.padding, + output_padding=out_pad_d, + stride=stride_d) out_height = conv_utils.deconv_output_length(height, kernel_h, - self.padding, - stride_h) + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h) out_width = conv_utils.deconv_output_length(width, kernel_w, - self.padding, - stride_w) + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_depth, out_height, out_width) @@ -1058,20 +1123,7 @@ class Conv3DTranspose(Conv3D): if not context.executing_eagerly(): # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[d_axis] = conv_utils.deconv_output_length(out_shape[d_axis], - kernel_d, - self.padding, - stride_d) - out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) + out_shape = self.compute_output_shape(inputs.shape) outputs.set_shape(out_shape) if self.use_bias: @@ -1109,15 +1161,38 @@ class Conv3DTranspose(Conv3D): kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides + if self.output_padding is None: + out_pad_d = out_pad_h = out_pad_w = None + else: + out_pad_d, out_pad_h, out_pad_w = self.output_padding + output_shape[c_axis] = self.filters output_shape[d_axis] = conv_utils.deconv_output_length( - output_shape[d_axis], kernel_d, self.padding, stride_d) + output_shape[d_axis], + kernel_d, + padding=self.padding, + output_padding=out_pad_d, + stride=stride_d) output_shape[h_axis] = conv_utils.deconv_output_length( - output_shape[h_axis], kernel_h, self.padding, stride_h) + output_shape[h_axis], + kernel_h, + padding=self.padding, + output_padding=out_pad_h, + stride=stride_h) output_shape[w_axis] = conv_utils.deconv_output_length( - output_shape[w_axis], kernel_w, self.padding, stride_w) + output_shape[w_axis], + kernel_w, + padding=self.padding, + output_padding=out_pad_w, + stride=stride_w) return tensor_shape.TensorShape(output_shape) + def get_config(self): + config = super(Conv3DTranspose, self).get_config() + config.pop('dilation_rate') + config['output_padding'] = self.output_padding + return config + class SeparableConv(Conv): """Abstract base layer for separable nD convolution. diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index cad5e4c8bd..f88d632ab5 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -204,6 +204,9 @@ class Conv2DTransposeTest(test.TestCase): if test.is_gpu_available(cuda_only=True): self._run_test(kwargs, 'data_format', ['channels_first']) + kwargs['strides'] = (2, 2) + self._run_test(kwargs, 'output_padding', [(1, 1)]) + def test_conv2dtranspose_regularizers(self): kwargs = { 'filters': 3, @@ -239,6 +242,31 @@ class Conv2DTransposeTest(test.TestCase): self.assertEqual(layer.kernel.constraint, k_constraint) self.assertEqual(layer.bias.constraint, b_constraint) + @tf_test_util.run_in_graph_and_eager_modes + def test_conv2d_transpose_dilation(self): + testing_utils.layer_test(keras.layers.Conv2DTranspose, + kwargs={'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + 'data_format': 'channels_last', + 'dilation_rate': (2, 2)}, + input_shape=(2, 5, 6, 3)) + + input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32) + expected_output = np.float32([[192, 228, 192, 228], + [336, 372, 336, 372], + [192, 228, 192, 228], + [336, 372, 336, 372]]).reshape((1, 4, 4, 1)) + testing_utils.layer_test(keras.layers.Conv2DTranspose, + input_data=input_data, + kwargs={'filters': 1, + 'kernel_size': 3, + 'padding': 'same', + 'data_format': 'channels_last', + 'dilation_rate': (2, 2), + 'kernel_initializer': 'ones'}, + expected_output=expected_output) + class Conv3DTransposeTest(test.TestCase): @@ -270,6 +298,9 @@ class Conv3DTransposeTest(test.TestCase): if test.is_gpu_available(cuda_only=True): self._run_test(kwargs, 'data_format', ['channels_first']) + kwargs['strides'] = (2, 2, 2) + self._run_test(kwargs, 'output_padding', [(1, 1, 1)]) + def test_conv3dtranspose_regularizers(self): kwargs = { 'filters': 3, diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py index 912e8bd619..72a9c1d629 100644 --- a/tensorflow/python/keras/layers/pooling.py +++ b/tensorflow/python/keras/layers/pooling.py @@ -18,12 +18,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend from tensorflow.python.keras.engine.base_layer import InputSpec from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.utils import conv_utils from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export @@ -41,16 +44,18 @@ class Pooling1D(Layer): strides of the pooling operation. padding: A string. The padding method, either 'valid' or 'same'. Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. + data_format: A string, + one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. name: A string, the name of the layer. """ def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format=None, + padding='valid', data_format='channels_last', name=None, **kwargs): super(Pooling1D, self).__init__(name=name, **kwargs) if data_format is None: @@ -65,45 +70,39 @@ class Pooling1D(Layer): self.input_spec = InputSpec(ndim=3) def call(self, inputs): - # There is no TF op for 1D pooling, hence we make the inputs 4D. - if self.data_format == 'channels_last': - # input is NWC, make it NHWC - inputs = array_ops.expand_dims(inputs, 1) - # pool on the W dim - pool_shape = (1, 1) + self.pool_size + (1,) - strides = (1, 1) + self.strides + (1,) - data_format = 'NHWC' - else: - # input is NCW, make it NCHW - inputs = array_ops.expand_dims(inputs, 2) - # pool on the W dim - pool_shape = (1, 1, 1) + self.pool_size - strides = (1, 1, 1) + self.strides - data_format = 'NCHW' - + pad_axis = 2 if self.data_format == 'channels_last' else 3 + inputs = array_ops.expand_dims(inputs, pad_axis) outputs = self.pool_function( inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper(), - data_format=data_format) - - if self.data_format == 'channels_last': - return array_ops.squeeze(outputs, 1) - else: - return array_ops.squeeze(outputs, 2) + self.pool_size + (1,), + strides=self.strides + (1,), + padding=self.padding, + data_format=self.data_format) + return array_ops.squeeze(outputs, pad_axis) def compute_output_shape(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() - length = conv_utils.conv_output_length(input_shape[1], self.pool_size[0], - self.padding, self.strides[0]) - return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]]) + if self.data_format == 'channels_first': + steps = input_shape[2] + features = input_shape[1] + else: + steps = input_shape[1] + features = input_shape[2] + length = conv_utils.conv_output_length(steps, + self.pool_size[0], + self.padding, + self.strides[0]) + if self.data_format == 'channels_first': + return tensor_shape.TensorShape([input_shape[0], features, length]) + else: + return tensor_shape.TensorShape([input_shape[0], length, features]) def get_config(self): config = { 'strides': self.strides, 'pool_size': self.pool_size, - 'padding': self.padding + 'padding': self.padding, + 'data_format': self.data_format, } base_config = super(Pooling1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -119,19 +118,36 @@ class MaxPooling1D(Pooling1D): E.g. 2 will halve the input. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. Input shape: - 3D tensor with shape: `(batch_size, steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, steps)` Output shape: - 3D tensor with shape: `(batch_size, downsampled_steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, downsampled_steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, downsampled_steps)` """ def __init__(self, pool_size=2, strides=None, - padding='valid', data_format=None, **kwargs): + padding='valid', data_format='channels_last', **kwargs): super(MaxPooling1D, self).__init__( - nn.max_pool, + functools.partial(backend.pool2d, pool_mode='max'), pool_size=pool_size, strides=strides, padding=padding, @@ -149,18 +165,35 @@ class AveragePooling1D(Pooling1D): E.g. 2 will halve the input. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. Input shape: - 3D tensor with shape: `(batch_size, steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, steps)` Output shape: - 3D tensor with shape: `(batch_size, downsampled_steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, downsampled_steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, downsampled_steps)` """ def __init__(self, pool_size=2, strides=None, - padding='valid', data_format=None, **kwargs): + padding='valid', data_format='channels_last', **kwargs): super(AveragePooling1D, self).__init__( - nn.avg_pool, + functools.partial(backend.pool2d, pool_mode='avg'), pool_size=pool_size, strides=strides, padding=padding, @@ -561,41 +594,96 @@ class GlobalPooling1D(Layer): """Abstract class for different global pooling 1D layers. """ - def __init__(self, **kwargs): + def __init__(self, data_format='channels_last', **kwargs): super(GlobalPooling1D, self).__init__(**kwargs) self.input_spec = InputSpec(ndim=3) + self.data_format = conv_utils.normalize_data_format(data_format) def compute_output_shape(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() - return tensor_shape.TensorShape([input_shape[0], input_shape[2]]) + if self.data_format == 'channels_first': + return tensor_shape.TensorShape([input_shape[0], input_shape[1]]) + else: + return tensor_shape.TensorShape([input_shape[0], input_shape[2]]) def call(self, inputs): raise NotImplementedError + def get_config(self): + config = {'data_format': self.data_format} + base_config = super(GlobalPooling1D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + @tf_export('keras.layers.GlobalAveragePooling1D', 'keras.layers.GlobalAvgPool1D') class GlobalAveragePooling1D(GlobalPooling1D): """Global average pooling operation for temporal data. + Arguments: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + Input shape: - 3D tensor with shape: `(batch_size, steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, steps)` Output shape: 2D tensor with shape: `(batch_size, features)` """ - def call(self, inputs): - return backend.mean(inputs, axis=1) + def __init__(self, data_format='channels_last', **kwargs): + super(GlobalAveragePooling1D, self).__init__(data_format=data_format, + **kwargs) + self.supports_masking = True + + def call(self, inputs, mask=None): + steps_axis = 1 if self.data_format == 'channels_last' else 2 + if mask is not None: + mask = math_ops.cast(mask, backend.floatx()) + input_shape = inputs.shape.as_list() + broadcast_shape = [-1, input_shape[steps_axis], 1] + mask = array_ops.reshape(mask, broadcast_shape) + inputs *= mask + return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum( + mask, axis=steps_axis) + else: + return backend.mean(inputs, axis=steps_axis) + + def compute_mask(self, inputs, mask=None): + return None @tf_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D') class GlobalMaxPooling1D(GlobalPooling1D): """Global max pooling operation for temporal data. + Arguments: + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + Input shape: - 3D tensor with shape: `(batch_size, steps, features)`. + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, steps)` Output shape: 2D tensor with shape: @@ -603,7 +691,8 @@ class GlobalMaxPooling1D(GlobalPooling1D): """ def call(self, inputs): - return backend.max(inputs, axis=1) + steps_axis = 1 if self.data_format == 'channels_last' else 2 + return backend.max(inputs, axis=steps_axis) class GlobalPooling2D(Layer): diff --git a/tensorflow/python/keras/layers/pooling_test.py b/tensorflow/python/keras/layers/pooling_test.py index 2cd9939e66..936e73ecf9 100644 --- a/tensorflow/python/keras/layers/pooling_test.py +++ b/tensorflow/python/keras/layers/pooling_test.py @@ -18,11 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import testing_utils from tensorflow.python.platform import test +from tensorflow.python.training import rmsprop class GlobalPoolingTest(test.TestCase): @@ -31,8 +34,26 @@ class GlobalPoolingTest(test.TestCase): def test_globalpooling_1d(self): testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D, input_shape=(3, 4, 5)) + testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D, + kwargs={'data_format': 'channels_first'}, + input_shape=(3, 4, 5)) testing_utils.layer_test( keras.layers.pooling.GlobalAveragePooling1D, input_shape=(3, 4, 5)) + testing_utils.layer_test(keras.layers.pooling.GlobalAveragePooling1D, + kwargs={'data_format': 'channels_first'}, + input_shape=(3, 4, 5)) + + @tf_test_util.run_in_graph_and_eager_modes + def test_globalpooling_1d_masking_support(self): + model = keras.Sequential() + model.add(keras.layers.Masking(mask_value=0., input_shape=(3, 4))) + model.add(keras.layers.GlobalAveragePooling1D()) + model.compile(loss='mae', optimizer=rmsprop.RMSPropOptimizer(0.001)) + + model_input = np.random.random((2, 3, 4)) + model_input[0, 1:, :] = 0 + output = model.predict(model_input) + self.assertAllClose(output[0], model_input[0, 0, :]) @tf_test_util.run_in_graph_and_eager_modes def test_globalpooling_2d(self): @@ -172,6 +193,10 @@ class Pooling1DTest(test.TestCase): kwargs={'strides': stride, 'padding': padding}, input_shape=(3, 5, 4)) + testing_utils.layer_test( + keras.layers.MaxPooling1D, + kwargs={'data_format': 'channels_first'}, + input_shape=(3, 2, 6)) @tf_test_util.run_in_graph_and_eager_modes def test_averagepooling_1d(self): @@ -183,6 +208,11 @@ class Pooling1DTest(test.TestCase): 'padding': padding}, input_shape=(3, 5, 4)) + testing_utils.layer_test( + keras.layers.AveragePooling1D, + kwargs={'data_format': 'channels_first'}, + input_shape=(3, 2, 6)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py index a1933c11b0..d19d0b5f8c 100644 --- a/tensorflow/python/keras/layers/wrappers.py +++ b/tensorflow/python/keras/layers/wrappers.py @@ -587,6 +587,9 @@ class Bidirectional(Wrapper): output = y * y_rev elif self.merge_mode is None: output = [y, y_rev] + else: + raise ValueError( + 'Unrecognized value for `merge_mode`: %s' % (self.merge_mode)) # Properly set learning phase if (getattr(y, '_uses_learning_phase', False) or diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py index 501b50ba5f..2fae094a1e 100644 --- a/tensorflow/python/keras/testing_utils.py +++ b/tensorflow/python/keras/testing_utils.py @@ -166,8 +166,9 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, if expected_dim is not None: if expected_dim != actual_dim: raise AssertionError( - 'When testing layer %s, for input %s, found output_shape=' - '%s but expected to find %s.\nFull kwargs: %s' % + 'When testing layer %s **after deserialization**, ' + 'for input %s, found output_shape=' + '%s but expected to find inferred shape %s.\nFull kwargs: %s' % (layer_cls.__name__, x, actual_output_shape, diff --git a/tensorflow/python/keras/utils/conv_utils.py b/tensorflow/python/keras/utils/conv_utils.py index 8ebca1418d..f486e631e5 100644 --- a/tensorflow/python/keras/utils/conv_utils.py +++ b/tensorflow/python/keras/utils/conv_utils.py @@ -137,26 +137,49 @@ def conv_input_length(output_length, filter_size, padding, stride): return (output_length - 1) * stride - 2 * pad + filter_size -def deconv_output_length(input_length, filter_size, padding, stride): +def deconv_output_length(input_length, filter_size, padding, + output_padding=None, stride=0, dilation=1): """Determines output length of a transposed convolution given input length. Arguments: - input_length: integer. - filter_size: integer. - padding: one of "same", "valid", "full". - stride: integer. + input_length: Integer. + filter_size: Integer. + padding: one of `"same"`, `"valid"`, `"full"`. + output_padding: Integer, amount of padding along the output dimension. + Can be set to `None` in which case the output length is inferred. + stride: Integer. + dilation: Integer. Returns: The output length (integer). """ + assert padding in {'same', 'valid', 'full'} if input_length is None: return None - input_length *= stride - if padding == 'valid': - input_length += max(filter_size - stride, 0) - elif padding == 'full': - input_length -= (stride + filter_size - 2) - return input_length + + # Get the dilated kernel size + filter_size = filter_size + (filter_size - 1) * (dilation - 1) + + # Infer length if output padding is None, else compute the exact length + if output_padding is None: + if padding == 'valid': + length = input_length * stride + max(filter_size - stride, 0) + elif padding == 'full': + length = input_length * stride - (stride + filter_size - 2) + elif padding == 'same': + length = input_length * stride + + else: + if padding == 'same': + pad = filter_size // 2 + elif padding == 'valid': + pad = 0 + elif padding == 'full': + pad = filter_size - 1 + + length = ((input_length - 1) * stride + filter_size - 2 * pad + + output_padding) + return length def normalize_data_format(value): diff --git a/tensorflow/python/keras/utils/multi_gpu_utils.py b/tensorflow/python/keras/utils/multi_gpu_utils.py index e1c49bc852..04b2ea8fe3 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils.py @@ -244,9 +244,24 @@ def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False): for o in range(len(outputs)): all_outputs[o].append(outputs[o]) + # Deduplicate output names to handle Siamese networks. + occurrences = {} + for n in model.output_names: + if n not in occurrences: + occurrences[n] = 1 + else: + occurrences[n] += 1 + conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} + output_names = [] + for n in model.output_names: + if n in conflict_counter: + conflict_counter[n] += 1 + n += '_%d' % conflict_counter[n] + output_names.append(n) + # Merge outputs under expected scope. with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] - for name, outputs in zip(model.output_names, all_outputs): + for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged) diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py index 3d0351a11f..1780ab6587 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py @@ -198,5 +198,31 @@ class TestMultiGPUModel(test.TestCase): parallel_model.compile(loss='mean_squared_error', optimizer='adam') parallel_model.train_on_batch(x, y) + def test_multi_gpu_with_siamese_network(self): + gpus = 2 + + if not check_if_compatible_devices(gpus=gpus): + return + + with self.cached_session(): + input_shape = (3,) + nested_model = keras.models.Sequential([ + keras.layers.Dense(32, input_shape=input_shape), + keras.layers.Dense(1) + ], name='nested') + + input1 = keras.Input(input_shape) + input2 = keras.Input(input_shape) + score1 = nested_model(input1) + score2 = nested_model(input2) + score_sum = keras.layers.Add(name='add')([score1, score2]) + + siamese = keras.models.Model(inputs=[input1, input2], + outputs=[score_sum, score1, score2], + name='siamese') + parallel_siamese = keras.utils.multi_gpu_model(siamese, gpus) + self.assertEqual(parallel_siamese.output_names, + ['add', 'nested_1', 'nested_2']) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/utils/np_utils.py b/tensorflow/python/keras/utils/np_utils.py index c24e87308b..3763999bff 100644 --- a/tensorflow/python/keras/utils/np_utils.py +++ b/tensorflow/python/keras/utils/np_utils.py @@ -22,7 +22,7 @@ from tensorflow.python.util.tf_export import tf_export @tf_export('keras.utils.to_categorical') -def to_categorical(y, num_classes=None): +def to_categorical(y, num_classes=None, dtype='float32'): """Converts a class vector (integers) to binary class matrix. E.g. for use with categorical_crossentropy. @@ -31,6 +31,7 @@ def to_categorical(y, num_classes=None): y: class vector to be converted into a matrix (integers from 0 to num_classes). num_classes: total number of classes. + dtype: The data type expected by the input. Default: `'float32'`. Returns: A binary matrix representation of the input. The classes axis is placed @@ -44,7 +45,7 @@ def to_categorical(y, num_classes=None): if not num_classes: num_classes = np.max(y) + 1 n = y.shape[0] - categorical = np.zeros((n, num_classes), dtype=np.float32) + categorical = np.zeros((n, num_classes), dtype=dtype) categorical[np.arange(n), y] = 1 output_shape = input_shape + (num_classes,) categorical = np.reshape(categorical, output_shape) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt index 2e9de9ebb2..eb315e356d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "elu" argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], " } + member_method { + name: "exponential" + argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get" argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt index a71a59e269..9feb7c09b8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt @@ -46,7 +46,7 @@ tf_module { } member_method { name: "batch_normalization" - argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], " + argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], " } member_method { name: "batch_set_value" @@ -98,7 +98,7 @@ tf_module { } member_method { name: "conv2d_transpose" - argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], " + argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], " } member_method { name: "conv3d" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt index c3dd2ad046..014f5828fa 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt index c440604aae..a6e4856de9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 065bb4d35b..381839d6de 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index c7ba6056f9..2933f9f4b3 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 8f4f7918ab..9c9c7461c8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 93c442bd55..44ca598724 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 5ea61d118d..a8094c0bde 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" @@ -111,7 +111,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index 11dca17c6d..3ebe162f57 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" @@ -111,7 +111,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index 278429af6f..c0a53b847b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 935a69ab2f..ff6c6f3ec4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt index 238d96cca6..d26da270e7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt index 4a45bf7997..524c5fd69e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt index 81b91d2780..138d97b11f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt @@ -70,6 +70,6 @@ tf_module { } member_method { name: "to_categorical" - argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt index 2e9de9ebb2..eb315e356d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "elu" argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], " } + member_method { + name: "exponential" + argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get" argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt index a71a59e269..9feb7c09b8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt @@ -46,7 +46,7 @@ tf_module { } member_method { name: "batch_normalization" - argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], " + argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], " } member_method { name: "batch_set_value" @@ -98,7 +98,7 @@ tf_module { } member_method { name: "conv2d_transpose" - argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], " + argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], " } member_method { name: "conv3d" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt index c3dd2ad046..014f5828fa 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt index c440604aae..a6e4856de9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 065bb4d35b..381839d6de 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index c7ba6056f9..2933f9f4b3 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 8f4f7918ab..9c9c7461c8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 93c442bd55..44ca598724 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -84,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 5ea61d118d..a8094c0bde 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" @@ -111,7 +111,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index 11dca17c6d..3ebe162f57 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" @@ -111,7 +111,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index 278429af6f..c0a53b847b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 935a69ab2f..ff6c6f3ec4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt index 238d96cca6..d26da270e7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt index 4a45bf7997..524c5fd69e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -83,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt index 81b91d2780..138d97b11f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt @@ -70,6 +70,6 @@ tf_module { } member_method { name: "to_categorical" - argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], " } } -- GitLab From d1588d72a820423cab36977ca97221aba01be713 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 8 Oct 2018 10:43:03 -0700 Subject: [PATCH 066/411] Add a utility that allows finding a name for an entity, relative to an existing namespace. PiperOrigin-RevId: 216211286 --- .../python/autograph/pyct/inspect_utils.py | 34 +++++++++++++++++++ .../autograph/pyct/inspect_utils_test.py | 19 +++++++++++ 2 files changed, 53 insertions(+) diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py index 1416988ea3..29c406c248 100644 --- a/tensorflow/python/autograph/pyct/inspect_utils.py +++ b/tensorflow/python/autograph/pyct/inspect_utils.py @@ -67,6 +67,40 @@ def getnamespace(f): return namespace +def getqualifiedname(namespace, object_, max_depth=2): + """Returns the name by which a value can be referred to in a given namespace. + + This function will recurse inside modules, but it will not search objects for + attributes. The recursion depth is controlled by max_depth. + + Args: + namespace: Dict[str, Any], the namespace to search into. + object_: Any, the value to search. + max_depth: Optional[int], a limit to the recursion depth when searching + inside modules. + Returns: Union[str, None], the fully-qualified name that resolves to the value + o, or None if it couldn't be found. + """ + for name, value in namespace.items(): + # The value may be referenced by more than one symbol, case in which + # any symbol will be fine. If the program contains symbol aliases that + # change over time, this may capture a symbol that will later point to + # something else. + # TODO(mdan): Prefer the symbol that matches the value type name. + if object_ is value: + return name + + # TODO(mdan): Use breadth-first search and avoid visiting modules twice. + if max_depth: + for name, value in namespace.items(): + if tf_inspect.ismodule(value): + name_in_module = getqualifiedname(value.__dict__, object_, + max_depth - 1) + if name_in_module is not None: + return '{}.{}'.format(name, name_in_module) + return None + + def _get_unbound_function(m): # TODO(mdan): Figure out why six.get_unbound_function fails in some cases. # The failure case is for tf.keras.Model. diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py index f3eb027822..11074debfc 100644 --- a/tensorflow/python/autograph/pyct/inspect_utils_test.py +++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from functools import wraps +import imp import six @@ -127,6 +128,24 @@ class InspectUtilsTest(test.TestCase): self.assertEqual(ns['closed_over_primitive'], closed_over_primitive) self.assertTrue('local_var' not in ns) + def test_getqualifiedname(self): + foo = object() + qux = imp.new_module('quxmodule') + bar = imp.new_module('barmodule') + baz = object() + bar.baz = baz + + ns = { + 'foo': foo, + 'bar': bar, + 'qux': qux, + } + + self.assertIsNone(inspect_utils.getqualifiedname(ns, inspect_utils)) + self.assertEqual(inspect_utils.getqualifiedname(ns, foo), 'foo') + self.assertEqual(inspect_utils.getqualifiedname(ns, bar), 'bar') + self.assertEqual(inspect_utils.getqualifiedname(ns, baz), 'bar.baz') + def test_getmethodclass(self): self.assertEqual( -- GitLab From 0691d49fb6e15740b8ddf8019fea4edb91bca914 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 10:43:54 -0700 Subject: [PATCH 067/411] Convert TensorFlow's nasm dependency to new third party import method. PiperOrigin-RevId: 216211467 --- tensorflow/workspace.bzl | 15 ++------------- third_party/nasm/BUILD | 1 + third_party/{nasm.BUILD => nasm/BUILD.bazel} | 12 ++++++------ .../nasm.BUILD => nasm/BUILD.system} | 0 third_party/nasm/workspace.bzl | 17 +++++++++++++++++ 5 files changed, 26 insertions(+), 19 deletions(-) create mode 100644 third_party/nasm/BUILD rename third_party/{nasm.BUILD => nasm/BUILD.bazel} (100%) rename third_party/{systemlibs/nasm.BUILD => nasm/BUILD.system} (100%) create mode 100644 third_party/nasm/workspace.bzl diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 6f5aa85b01..adeac62e43 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -23,11 +23,13 @@ load( load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("//third_party/icu:workspace.bzl", icu = "repo") load("//third_party/jpeg:workspace.bzl", jpeg = "repo") +load("//third_party/nasm:workspace.bzl", nasm = "repo") def initialize_third_party(): flatbuffers() icu() jpeg() + nasm() # Sanitize a dependency so that it works correctly from code that includes # TensorFlow as a submodule. @@ -235,19 +237,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): ], ) - tf_http_archive( - name = "nasm", - build_file = clean_dep("//third_party:nasm.BUILD"), - sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011", - strip_prefix = "nasm-2.13.03", - system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"), - urls = [ - "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", - "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2", - "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", - ], - ) - tf_http_archive( name = "png_archive", build_file = clean_dep("//third_party:png.BUILD"), diff --git a/third_party/nasm/BUILD b/third_party/nasm/BUILD new file mode 100644 index 0000000000..e3aec1fce9 --- /dev/null +++ b/third_party/nasm/BUILD @@ -0,0 +1 @@ +# Needed to make this a package. diff --git a/third_party/nasm.BUILD b/third_party/nasm/BUILD.bazel similarity index 100% rename from third_party/nasm.BUILD rename to third_party/nasm/BUILD.bazel index d746a65e7e..c68d713946 100644 --- a/third_party/nasm.BUILD +++ b/third_party/nasm/BUILD.bazel @@ -137,12 +137,6 @@ cc_binary( ":windows": ["config/msvc.h"], "//conditions:default": [], }), - includes = [ - "asm", - "include", - "output", - "x86", - ], copts = select({ ":windows": [], "//conditions:default": [ @@ -157,6 +151,12 @@ cc_binary( "HAVE_SYS_TYPES_H", ], }), + includes = [ + "asm", + "include", + "output", + "x86", + ], visibility = ["@jpeg//:__pkg__"], ) diff --git a/third_party/systemlibs/nasm.BUILD b/third_party/nasm/BUILD.system similarity index 100% rename from third_party/systemlibs/nasm.BUILD rename to third_party/nasm/BUILD.system diff --git a/third_party/nasm/workspace.bzl b/third_party/nasm/workspace.bzl new file mode 100644 index 0000000000..6d50f6fcad --- /dev/null +++ b/third_party/nasm/workspace.bzl @@ -0,0 +1,17 @@ +"""loads the nasm library, used by TF.""" + +load("//third_party:repo.bzl", "third_party_http_archive") + +def repo(): + third_party_http_archive( + name = "nasm", + urls = [ + "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", + "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2", + "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", + ], + sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011", + strip_prefix = "nasm-2.13.03", + build_file = "//third_party/nasm:BUILD.bazel", + system_build_file = "//third_party/nasm:BUILD.system", + ) -- GitLab From 3f0155133d668cf6cee1f1fb362d2a75c04836e3 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Mon, 8 Oct 2018 10:52:15 -0700 Subject: [PATCH 068/411] Fix support for a single tensor to be passed to target_tensors PiperOrigin-RevId: 216212953 --- tensorflow/python/keras/engine/training.py | 6 ++++-- tensorflow/python/keras/engine/training_distributed.py | 4 ---- tensorflow/python/keras/engine/training_test.py | 4 ++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 2ebb4cf99f..ff2ae54ad4 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -563,9 +563,11 @@ class Model(Network): for name in self.output_names: tmp_target_tensors.append(target_tensors.get(name, None)) target_tensors = tmp_target_tensors + elif tensor_util.is_tensor(target_tensors): + target_tensors = [target_tensors] else: - raise TypeError('Expected `target_tensors` to be ' - 'a list or dict, but got:', target_tensors) + raise TypeError('Expected `target_tensors` to be a list or tuple or ' + 'dict or a single tensor, but got:', target_tensors) for i in range(len(self.outputs)): if i in skip_target_indices: diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py index 04e8d079c0..ac759ef3aa 100644 --- a/tensorflow/python/keras/engine/training_distributed.py +++ b/tensorflow/python/keras/engine/training_distributed.py @@ -820,10 +820,6 @@ def _clone_and_build_model(model, inputs=None, targets=None): optimizer_config = model.optimizer.get_config() optimizer = model.optimizer.__class__.from_config(optimizer_config) - # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a - # single tensor should be OK but it throws an error in that case. - if targets is not None and not isinstance(targets, (list, dict, tuple)): - targets = [targets] if isinstance(targets, tuple): targets = nest.flatten(targets) cloned_model.compile( diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 54ad74c08b..868fd1dc69 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1865,6 +1865,10 @@ class TestTrainingWithDataTensors(test.TestCase): model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) model.train_on_batch(input_val, None) + # single-output, as single tensor + model.compile(optimizer='rmsprop', loss='mse', target_tensors=target) + model.train_on_batch(input_val, None) + # single-output, as dict model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense': target}) -- GitLab From 7d92890cb215f2f563fac96f1e3bde712a8749f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 11:18:12 -0700 Subject: [PATCH 069/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216217887 --- .../core/ops/compat/ops_history.v1.pbtxt | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 0753316724..9df0ece69b 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -28980,6 +28980,74 @@ op { minimum: 1 } } +op { + name: "LeakyRelu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "LeakyReluGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { -- GitLab From 1221a8e38a402513560ee71e6982b7cd8b6d901b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 11:54:12 -0700 Subject: [PATCH 070/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216224026 --- tensorflow/go/op/wrappers.go | 228 +++++++++++++++++------------------ 1 file changed, 114 insertions(+), 114 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5d17605e37..fe99915a6c 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -7221,6 +7221,45 @@ func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf. return components } +// Deprecated. Use TensorArrayGradV3 +// +// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3 +func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArrayWriteV2", + Input: []tf.Input{ + handle, index, value, flow_in, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Writes the given dataset to the given file using the TFRecord format. +// +// Arguments: +// input_dataset: A variant tensor representing the dataset to write. +// filename: A scalar string tensor representing the filename to use. +// compression_type: A scalar string tensor containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// +// Returns the created operation. +func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DatasetToTFRecord", + Input: []tf.Input{ + input_dataset, filename, compression_type, + }, + } + return scope.AddOperation(opspec) +} + // Computes rectified linear 6: `min(max(features, 0), 6)`. func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { @@ -8251,44 +8290,6 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt return op.Output(0) } -// Bucketizes 'input' based on 'boundaries'. -// -// For example, if the inputs are -// boundaries = [0, 10, 100] -// input = [[-5, 10000] -// [150, 10] -// [5, 100]] -// -// then the output will be -// output = [[0, 3] -// [3, 2] -// [1, 3]] -// -// Arguments: -// input: Any shape of Tensor contains with int or float type. -// boundaries: A sorted list of floats gives the boundary of the buckets. -// -// Returns Same shape with 'input', each value of input replaced with bucket index. -// -// @compatibility(numpy) -// Equivalent to np.digitize. -// @end_compatibility -func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"boundaries": boundaries} - opspec := tf.OpSpec{ - Type: "Bucketize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. type FusedBatchNormV2Attr func(optionalAttr) @@ -10980,6 +10981,44 @@ func Tan(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// Bucketizes 'input' based on 'boundaries'. +// +// For example, if the inputs are +// boundaries = [0, 10, 100] +// input = [[-5, 10000] +// [150, 10] +// [5, 100]] +// +// then the output will be +// output = [[0, 3] +// [3, 2] +// [1, 3]] +// +// Arguments: +// input: Any shape of Tensor contains with int or float type. +// boundaries: A sorted list of floats gives the boundary of the buckets. +// +// Returns Same shape with 'input', each value of input replaced with bucket index. +// +// @compatibility(numpy) +// Equivalent to np.digitize. +// @end_compatibility +func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"boundaries": boundaries} + opspec := tf.OpSpec{ + Type: "Bucketize", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // EncodeJpegAttr is an optional argument to EncodeJpeg. type EncodeJpegAttr func(optionalAttr) @@ -21413,43 +21452,6 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min return op.Output(0), op.Output(1), op.Output(2) } -// Computes the minimum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such -// that `segment_ids[j] == i`. -// -// If the min is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMin", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SdcaOptimizerAttr is an optional argument to SdcaOptimizer. type SdcaOptimizerAttr func(optionalAttr) @@ -21924,6 +21926,43 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp return op.Output(0), op.Output(1), op.Output(2) } +// Computes the minimum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such +// that `segment_ids[j] == i`. +// +// If the min is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMin", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the sum along segments of a tensor. // // Read @@ -29878,28 +29917,6 @@ func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) { return op.Output(0) } -// Writes the given dataset to the given file using the TFRecord format. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to write. -// filename: A scalar string tensor representing the filename to use. -// compression_type: A scalar string tensor containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// -// Returns the created operation. -func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DatasetToTFRecord", - Input: []tf.Input{ - input_dataset, filename, compression_type, - }, - } - return scope.AddOperation(opspec) -} - // AvgPool3DAttr is an optional argument to AvgPool3D. type AvgPool3DAttr func(optionalAttr) @@ -31692,23 +31709,6 @@ func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size return op.Output(0) } -// Deprecated. Use TensorArrayGradV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3 -func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayWriteV2", - Input: []tf.Input{ - handle, index, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SparseReduceMaxAttr is an optional argument to SparseReduceMax. type SparseReduceMaxAttr func(optionalAttr) -- GitLab From 723fd1245ed650ad07e5049faec021f4f0f6d408 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Mon, 8 Oct 2018 12:03:09 -0700 Subject: [PATCH 071/411] Fix the steps_per_epoch when training on mnist PiperOrigin-RevId: 216225505 --- tensorflow/contrib/distribute/python/examples/keras_mnist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/python/examples/keras_mnist.py b/tensorflow/contrib/distribute/python/examples/keras_mnist.py index a84ef04196..da7f8c548f 100644 --- a/tensorflow/contrib/distribute/python/examples/keras_mnist.py +++ b/tensorflow/contrib/distribute/python/examples/keras_mnist.py @@ -113,7 +113,7 @@ def main(_): distribute=strategy) # Train the model with the train dataset. - model.fit(x=train_ds, epochs=20, steps_per_epoch=310) + model.fit(x=train_ds, epochs=20, steps_per_epoch=468) # Evaluate the model with the eval dataset. score = model.evaluate(eval_ds, steps=10, verbose=0) -- GitLab From dcd3b4307a3095e3f18aef53f5034787e3cc3af6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 12:09:54 -0700 Subject: [PATCH 072/411] Remove the restrictions that constant resolution of reduce_sum operators must be on axis 0, and can only be on 1 or 2-d inputs. PiperOrigin-RevId: 216226776 --- .../resolve_constant_unary.cc | 93 +++++++++--- .../toco/graph_transformations/tests/BUILD | 13 ++ .../tests/resolve_constant_unary_test.cc | 140 ++++++++++++++++++ 3 files changed, 229 insertions(+), 17 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index c698a9567a..5364eebbc9 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -27,6 +27,73 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" namespace toco { +namespace { + +// Using the function reducer, reduce input along all axes in axes. +// Put the reduced data in output, which should aleady be appropriately sized. +// check_output_shape is set to what this code computes the final shape +// to be, so it can be cross checked with the shape computation logic. +void ReduceGeneric(bool keep_dims, const std::vector& axes, + const Shape& input_shape, const std::vector& input, + Shape* check_output_shape, std::vector* output, + const std::function& reducer) { + if (!IsNonEmpty(input_shape)) { + // Zero-dimensions will break the NextIndices() logic, so just early out if + // we have an empty shape. + return; + } + + // Set up output_shape to be the same length as input_shape, with + // appropriate dimensions squashed to 1. If keep_dims is false, we'll strip + // out the one dimensions at the end, but it's convenient to leave them for + // now. We recompute the shape because we need the output shape to have + // 1-dims in all the squashed dimensions; the shape from shape computation may + // remove those squashed dimensions, depending on the options used. + Shape output_shape = input_shape; + + // Reduction mask will be elementwise multiplied against the input + // indices to figure out the output index for the element. + std::vector reduction_mask(input_shape.dimensions_count(), 1); + for (int axis : axes) { + CHECK_GE(axis, 0); + CHECK_LT(axis, input_shape.dimensions_count()); + reduction_mask[axis] = 0; + output_shape.mutable_dims()->at(axis) = 1; + } + + std::vector output_indices(input_shape.dimensions_count()); + for (int input_offset = 0; input_offset < input.size(); ++input_offset) { + std::vector input_indices = ReverseOffset(input_shape, input_offset); + // Calculate the output location by squashing input indices to 0 + // in reduced axes. + for (int i = 0; i < input_shape.dimensions_count(); ++i) { + output_indices[i] = input_indices[i] * reduction_mask[i]; + } + int output_offset = Offset(output_shape, output_indices); + if (input_indices == output_indices) { + // Base element for the reduced axes + output->at(output_offset) = input.at(input_offset); + } else { + // Reduce with existing element. + output->at(output_offset) = + reducer(output->at(output_offset), input.at(input_offset)); + } + } + + if (!keep_dims) { + // Strip out the dims from output_shape. + std::vector new_dims; + for (int i = 0; i < output_shape.dimensions_count(); ++i) { + if (reduction_mask[i]) { + new_dims.push_back(output_shape.dims(i)); + } + } + output_shape.mutable_dims()->swap(new_dims); + } + *check_output_shape = output_shape; +} + +} // namespace bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { auto& output_array = model->GetArray(op.outputs[0]); @@ -176,27 +243,19 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } auto& axis_array = model->GetArray(unary_op->inputs[1]); CHECK(axis_array.data_type == ArrayDataType::kInt32); - int axis = axis_array.GetBuffer().data[0]; - CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds"; - // We currently only handle reduction on axis 0. - CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported"; - // We currently only handle 1-D and 2-D input tensors. - CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported"; // We only support keep_dims=true; shape prop will need to change otherwise. auto sum_op = static_cast(unary_op); - CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported"; + Shape check_output_shape; - std::vector indices(input_shape.dimensions_count()); - for (int i = 0; i < input_shape.dims(1); ++i) { - indices[1] = i; - float sum = 0.f; - for (int j = 0; j < input_shape.dims(0); ++j) { - indices[0] = j; - sum += (*input_float_data)[Offset(input_shape, indices)]; - } - output_float_data[i] = sum; - } + ReduceGeneric( + sum_op->keep_dims, axis_array.GetBuffer().data, + input_shape, *input_float_data, &check_output_shape, &output_float_data, + [](float existing, float current) -> float { + return existing + current; + }); + CHECK(check_output_shape == output_shape) + << "Shape propagation output shape doesn't match output shape from op"; } else if (unary_op->type == OperatorType::kReduceMin) { // At the moment only full reduction across all dimensions is supported. // TODO(starka): Output should not be padded. diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index acf1e3ede5..6f1be298ca 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -30,3 +30,16 @@ tf_cc_test( "@com_google_googletest//:gtest_main", ], ) + +tf_cc_test( + name = "resolve_constant_unary_test", + srcs = ["resolve_constant_unary_test.cc"], + tags = ["no_oss"], + deps = [ + "//tensorflow/contrib/lite/toco:graph_transformations", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc new file mode 100644 index 0000000000..a53abc9941 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include +#include "absl/memory/memory.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +void RunResolveSum(const std::vector& input, + const std::vector& input_shape, + const std::vector& axis, + const std::vector& output_shape, + const std::vector& expected_output) { + Model model; + Array& input0 = model.GetOrCreateArray("input0"); + Array& input1 = model.GetOrCreateArray("input1"); + Array& output = model.GetOrCreateArray("output"); + + *input0.mutable_shape()->mutable_dims() = input_shape; + input0.data_type = ArrayDataType::kFloat; + input0.GetMutableBuffer().data = input; + + *input1.mutable_shape()->mutable_dims() = {static_cast(axis.size())}; + input1.GetMutableBuffer().data = axis; + input1.data_type = ArrayDataType::kInt32; + + *output.mutable_shape()->mutable_dims() = output_shape; + + auto sum_op = absl::make_unique(); + sum_op->keep_dims = true; + sum_op->inputs = {"input0", "input1"}; + sum_op->outputs = {"output"}; + model.operators.push_back(std::move(sum_op)); + ResolveConstantUnaryOperator().Run(&model, 0); + EXPECT_EQ(model.GetArray("output").GetBuffer().data, + expected_output); + EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape); +} + +// Reduce a 2d array across axis 0 +TEST(ResolveConstantUnary, ResolveSumAxis0_2D) { + // clang-format off + RunResolveSum( + // Input data + {3, 1, 4, 1, + 5, 9, 2, 6, + 5, 3, 5, 8}, + + // Input shape + {3, 4}, + + // Axes + {0}, + + // Expected output shape, + {1, 4}, + + // Expected output + {13, 13, 11, 15}); + // clang-format on +} + +// Reduce a 2d array across axis 1 +TEST(ResolveConstantUnary, ResolveSumAxis1_2D) { + // clang-format off + RunResolveSum( + // Input data + {3, 1, 4, 1, + 5, 9, 2, 6, + 5, 3, 5, 8}, + + // Input shape + {3, 4}, + + // Axes + {1}, + + // Expected output shape, + {3, 1}, + + // Expected output + {9, 22, 21}); + // clang-format on +} + +// Reduce a 3d tensor across axes 0 and 2. +TEST(ResolveConstantUnary, ResolveSumAxis0_2_3D) { + // clang-format off + RunResolveSum( + // Input data + { 0, 1, 2, + 3, 10, 11, + 12, 13, 20, + 21, 22, 23, + + 100, 101, 102, + 103, 110, 111, + 112, 113, 120, + 121, 122, 123, + + 200, 201, 202, + 203, 210, 211, + 212, 213, 220, + 221, 222, 223 }, + + // Input shape + {3, 4, 3}, + + // Axes + {0, 2}, + + // Expected output shape, + {1, 4, 1}, + + // Expected output, generated using octave. + { 909, 972, 1035, 1098}); + // clang-format on +} + +} // namespace +} // namespace toco -- GitLab From d3595b1534a855f3d0da35d3f1dd8b5d464b1b70 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 12:34:00 -0700 Subject: [PATCH 073/411] Fix a couple of reference leaks PiperOrigin-RevId: 216230391 --- tensorflow/python/pywrap_tfe.i | 1 + tensorflow/python/util/util.cc | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 61e0abbfcb..adbce95c6f 100755 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -209,6 +209,7 @@ limitations under the License. SWIG_fail; } else { int num_outputs = $1->size(); + Py_CLEAR($result); $result = PyList_New(num_outputs); for (int i = 0; i < num_outputs; ++i) { PyObject *output; diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 7b3e618e84..11eb9ce947 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -825,18 +825,16 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) { } PyObject* SameNamedtuples(PyObject* o1, PyObject* o2) { - PyObject* f1 = PyObject_GetAttrString(o1, "_fields"); - PyObject* f2 = PyObject_GetAttrString(o2, "_fields"); + Safe_PyObjectPtr f1 = make_safe(PyObject_GetAttrString(o1, "_fields")); + Safe_PyObjectPtr f2 = make_safe(PyObject_GetAttrString(o2, "_fields")); if (f1 == nullptr || f2 == nullptr) { - Py_XDECREF(f1); - Py_XDECREF(f2); PyErr_SetString( PyExc_RuntimeError, "Expected namedtuple-like objects (that have _fields attr)"); return nullptr; } - if (PyObject_RichCompareBool(f1, f2, Py_NE)) { + if (PyObject_RichCompareBool(f1.get(), f2.get(), Py_NE)) { Py_RETURN_FALSE; } -- GitLab From 9b558126e31d25ec4e82cb4f50033d6eca44349a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 12:58:29 -0700 Subject: [PATCH 074/411] Add timeout mechanism to Grappler meta optimizer. This is only a best-effort mechanism, since the meta optimizer only checks if it has been cancelled before running each sub-optimizer. We can add cancellation to each sub-optimizer if necessary. PiperOrigin-RevId: 216234262 --- .../grappler/optimizers/graph_optimizer.h | 21 ++++++ .../grappler/optimizers/meta_optimizer.cc | 68 ++++++++++++++++++- .../core/grappler/optimizers/meta_optimizer.h | 15 +++- .../optimizers/meta_optimizer_test.cc | 62 +++++++++++++++++ .../core/protobuf/rewriter_config.proto | 4 ++ 5 files changed, 165 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h index 765dd13263..bd6bf9f860 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h @@ -16,8 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_ #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_ +#include #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { namespace grappler { @@ -29,6 +32,7 @@ struct GrapplerItem; // optimization of a GrapplerItem for running on a cluster. class GraphOptimizer { public: + GraphOptimizer() : is_cancelled_(false) {} virtual ~GraphOptimizer() {} virtual string name() const = 0; @@ -45,8 +49,25 @@ class GraphOptimizer { // call to Optimize) performed. Lower "result" scores are better. virtual void Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& optimized_graph, double result) = 0; + + // Best effort cancellation. Sets is_cancelled to true and requests that the + // optimizer returns as soon as possible from active calls to Optimize() or + // FeedBack(). + void Cancel() { is_cancelled_ = true; } + + bool is_cancelled() const { return is_cancelled_; } + + private: + std::atomic is_cancelled_; }; +#define GRAPPLER_RETURN_IF_CANCELLED() \ + do { \ + if (is_cancelled()) { \ + return errors::DeadlineExceeded(this->name(), " was cancelled."); \ + } \ + } while (0) + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 3f33b16ba8..7488cedec5 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" + +#include + #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/versions.pb.h" @@ -37,7 +40,11 @@ limitations under the License. #include "tensorflow/core/grappler/utils/functions.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -115,6 +122,21 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( #undef MK_OPT +MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) + : cpu_device_(cpu_device), cfg_(cfg) { + // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs() + // if we want to the threadpool for parallelizing Grappler + const int kNumThreads = 1; + thread_pool_ = absl::make_unique( + Env::Default(), "MetaOptimizerThreadPool", kNumThreads); +} + +MetaOptimizer::~MetaOptimizer() { + // The ThreadPool destructor waits for threads to finish, so we don't + // pull the rug out from under them. + thread_pool_.reset(); +} + Status MetaOptimizer::InitializeOptimizers( std::vector>* optimizers) const { if (cfg_.disable_meta_optimizer()) { @@ -310,6 +332,7 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, VLOG(4) << "Starting optimization iteration " << iteration; for (const auto& optimizer : optimizers) { + GRAPPLER_RETURN_IF_CANCELLED(); // Some optimizers can run only once. if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; // Some must run only on the last iteration. @@ -368,6 +391,7 @@ Status MetaOptimizer::RunOptimizer( // resets optimized_graph to an empty graph. optimized_graph->Swap(&optimized_item->graph); *optimized_graph = GraphDef(); + // TODO(rmlarsen): Add timeout for individual optimizers. Status status = optimizer->Optimize(cluster, *optimized_item, optimized_graph); uint64 end_us = Env::Default()->NowMicros(); @@ -389,14 +413,15 @@ Status MetaOptimizer::RunOptimizer( return status; } -Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { +Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary( + Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { VLOG(1) << "Starting optimization for grappler item: " << item.id; optimization_results_.clear(); // 1. Optimize main graph TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); VLOG(1) << "Optimized main graph."; + GRAPPLER_RETURN_IF_CANCELLED(); // Skip optimizing functions if this is a TPU graph. Currently, Grappler // passes do not handle TPU functions correctly in a variety of ways (Note @@ -432,6 +457,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimize_function_library = false; for (const FunctionDef& func : optimized_graph->library().function()) { + GRAPPLER_RETURN_IF_CANCELLED(); + const string& func_name = func.signature().name(); // Skip already optimized functions. @@ -506,6 +533,43 @@ void MetaOptimizer::PrintResult() { } } +Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + const int64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000; + const int64 timeout_usec = (cfg_.meta_optimizer_timeout_ms() == 0 + ? kFiveMinutesInUsec + : cfg_.meta_optimizer_timeout_ms() * 1000); + if (timeout_usec < 0) { + return OptimizeMainGraphAndFunctionLibrary(cluster, item, optimized_graph); + } + + GraphDef optimized_with_timeout; + Status status; + Notification done; + thread_pool_->Schedule( + [this, cluster, &done, &optimized_with_timeout, &item, &status]() { + status = this->OptimizeMainGraphAndFunctionLibrary( + cluster, item, &optimized_with_timeout); + done.Notify(); + }); + + const bool notified = WaitForNotificationWithTimeout(&done, timeout_usec); + if (notified && status.ok()) { + optimized_graph->Swap(&optimized_with_timeout); + } else { + *optimized_graph = item.graph; + if (!notified) { + this->Cancel(); + done.WaitForNotification(); + status = errors::DeadlineExceeded( + "Grappler MetaOptimizer timed out after ", + static_cast(timeout_usec) / (1000 * 1000), " seconds"); + LOG(WARNING) << status.error_message(); + } + } + return status; +} + void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& pruned_graph, double result) { // Nothing to do for MetaOptimizer. diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 99a0a33ffa..35d6a4559b 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -28,9 +29,8 @@ namespace grappler { // Run the other grappler optimizers based on the specified rewriter config. class MetaOptimizer : public GraphOptimizer { public: - MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) - : cpu_device_(cpu_device), cfg_(cfg) {} - ~MetaOptimizer() override = default; + MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg); + ~MetaOptimizer(); string name() const override { return "meta_optimizer"; }; @@ -65,9 +65,18 @@ class MetaOptimizer : public GraphOptimizer { Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph); + // Run optimization passes over the main graph and for functions in the + // function library. + Status OptimizeMainGraphAndFunctionLibrary(Cluster* cluster, + const GrapplerItem& item, + GraphDef* optimized_graph); + DeviceBase* const cpu_device_; // may be NULL RewriterConfig cfg_; + // Thread pool used for launching optimizers asynchronously. + std::unique_ptr thread_pool_; + struct OptimizerResult { string optimizer_name; string result; diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc index 3f3f43382f..7f1dd91f09 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc @@ -461,6 +461,68 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) { EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites); } +class SleepingOptimizer : public CustomGraphOptimizer { + public: + SleepingOptimizer() {} + string name() const override { return "test_optimizer"; } + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override { + *optimized_graph = item.graph; + optimized_graph->add_node(); + sleep(1); + return Status::OK(); + } + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override {} +}; + +REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer); + +TEST_F(MetaOptimizerTest, OptimizerTimesOut) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + RewriterConfig rewriter_config; + rewriter_config.add_optimizers("SleepingOptimizer"); + rewriter_config.set_min_graph_nodes(-1); + rewriter_config.set_meta_optimizer_timeout_ms(1500); + rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO); + + MetaOptimizer optimizer(nullptr, rewriter_config); + GraphDef output; + const Status status = optimizer.Optimize(nullptr, item, &output); + EXPECT_EQ(status.error_message(), + "Grappler MetaOptimizer timed out after 1.5 seconds"); + // Make sure the graph was reverted to the original regardless of when the + // optimizer timed out. + CompareGraphs(item.graph, output); +} + +TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + RewriterConfig rewriter_config; + rewriter_config.add_optimizers("SleepingOptimizer"); + rewriter_config.set_min_graph_nodes(-1); + rewriter_config.set_meta_optimizer_timeout_ms(1500); + rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE); + MetaOptimizer optimizer(nullptr, rewriter_config); + GraphDef output; + const Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + EXPECT_EQ(item.graph.node_size() + 1, output.node_size()); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 8c31468ff5..7ccd54b818 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -83,6 +83,10 @@ message RewriterConfig { // Controls how many times we run the optimizers in meta optimizer (default // is once). NumIterationsType meta_optimizer_iterations = 12; + // Maximum number of milliseconds to spend optimizing a single graph before + // timing out. If equal to 0 the system picks a default (currently 5 minutes). + // If less than 0 the optimizer will never time out. + int64 meta_optimizer_timeout_ms = 20; // The minimum number of nodes in a graph to optimizer. For smaller graphs, // optimization is skipped. -- GitLab From 76ab96c8a5b2d77dfc191c94ff54fd5e52c561f2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 13:31:52 -0700 Subject: [PATCH 075/411] Changed Adam algorithm variant formula from sqrt(max(v, epsilon**2)) to sqrt(v + epsilon**2) and changed flag name accordingly. PiperOrigin-RevId: 216240045 --- tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto index 8529b48c15..c2e3be03db 100644 --- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto +++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto @@ -62,9 +62,9 @@ message FtrlParameters { // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in // order to get correct results; a warning will be printed otherwise (which may -// change to an error in the future). If use_max_with_epsilon is set, the Adam +// change to an error in the future). If use_sum_inside_sqrt is set, the Adam // variable update formula will be changed from m / (sqrt(v) + epsilon) to -// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU +// m / sqrt(v + epsilon**2); this option improves the performance of TPU // training and is not expected to harm model quality. message AdamParameters { float beta1 = 3; @@ -73,7 +73,7 @@ message AdamParameters { float initial_m = 6; float initial_v = 7; bool use_non_lazy_adam = 8; - bool use_max_with_epsilon = 9; + bool use_sum_inside_sqrt = 10; } // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer -- GitLab From b052c51374f558c25a29c70918d79205dfec808b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 13:46:12 -0700 Subject: [PATCH 076/411] Add tf.BenchmarkConfig that returns a session config appropriate for benchmarking. At the moment, it returns a default config with only Grappler dependency optimizer disabled. Many benchmarks wrap the subgraph they want to time in control_flow_ops.group() to avoid including the overhead of copying the output back to the Python client in the measurement. In the graph, this only adds a control dependency between the subgraph output and the fetch node, which in turn (often) causes the dependency optimizer to turn all nodes in the graph into no-ops. PiperOrigin-RevId: 216242463 --- .../python/kernel_tests/benchmark_test.py | 2 +- .../python/kernel_tests/cholesky_op_test.py | 7 ++- .../kernel_tests/determinant_op_test.py | 9 +-- .../kernel_tests/matrix_band_part_op_test.py | 5 +- .../matrix_exponential_op_test.py | 5 +- .../kernel_tests/matrix_inverse_op_test.py | 5 +- .../kernel_tests/matrix_logarithm_op_test.py | 3 +- .../kernel_tests/matrix_solve_ls_op_test.py | 5 +- .../kernel_tests/matrix_solve_op_test.py | 5 +- .../sparse_tensors_map_ops_test.py | 3 +- .../python/kernel_tests/where_op_test.py | 5 +- tensorflow/python/ops/image_ops_test.py | 62 +++++++++---------- tensorflow/python/platform/benchmark.py | 14 +++++ .../tools/api/golden/v1/tensorflow.test.pbtxt | 4 ++ .../tools/api/golden/v2/tensorflow.test.pbtxt | 4 ++ 15 files changed, 84 insertions(+), 54 deletions(-) diff --git a/tensorflow/python/kernel_tests/benchmark_test.py b/tensorflow/python/kernel_tests/benchmark_test.py index 78b6e38d94..5777a5d097 100644 --- a/tensorflow/python/kernel_tests/benchmark_test.py +++ b/tensorflow/python/kernel_tests/benchmark_test.py @@ -64,7 +64,7 @@ class TestReportingBenchmark(test.Benchmark): "other_key": "string"}) def benchmark_times_an_op(self): - with session.Session() as sess: + with session.Session(config=benchmark.benchmark_config()) as sess: a = constant_op.constant(0.0) a_plus_a = a + a return self.run_op_benchmark( diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py index 782e6b5068..2ebf74a4d7 100644 --- a/tensorflow/python/kernel_tests/cholesky_op_test.py +++ b/tensorflow/python/kernel_tests/cholesky_op_test.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables from tensorflow.python.ops.linalg import linalg +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -327,7 +328,7 @@ class CholeskyBenchmark(test.Benchmark): def benchmarkCholeskyOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) @@ -341,7 +342,7 @@ class CholeskyBenchmark(test.Benchmark): if test.is_gpu_available(True): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/device:GPU:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) @@ -359,7 +360,7 @@ class CholeskyBenchmark(test.Benchmark): for shape in self.shapes: matrix = self._GenerateMatrix(shape) with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device(device): l = variables.Variable(np.linalg.cholesky(matrix)) grad_matrix = variables.Variable( diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py index a52b2c0dc3..fb114f9f24 100644 --- a/tensorflow/python/kernel_tests/determinant_op_test.py +++ b/tensorflow/python/kernel_tests/determinant_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -185,8 +186,8 @@ class MatrixDeterminantBenchmark(test.Benchmark): def benchmarkMatrixDeterminantOp(self): for shape in self.shapes: - with ops.Graph().as_default(), session.Session() as sess, ops.device( - "/cpu:0"): + with ops.Graph().as_default(), session.Session( + config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) d = linalg_ops.matrix_determinant(matrix) variables.global_variables_initializer().run() @@ -198,8 +199,8 @@ class MatrixDeterminantBenchmark(test.Benchmark): name="matrix_determinant_cpu_{shape}".format(shape=shape)) if test.is_gpu_available(True): - with ops.Graph().as_default(), session.Session() as sess, ops.device( - "/gpu:0"): + with ops.Graph().as_default(), session.Session( + config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) d = linalg_ops.matrix_determinant(matrix) variables.global_variables_initializer().run() diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py index 68d626de2c..a0ef3a607e 100644 --- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py @@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test as test_lib @@ -109,7 +110,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark): for shape_ in self.shapes: for limits in (-1, -1), (-1, 0), (0, -1), (2, 2): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) @@ -123,7 +124,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark): if test_lib.is_gpu_available(True): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py index 0386e91276..9630c052b8 100644 --- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables from tensorflow.python.ops.linalg import linalg_impl +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -181,7 +182,7 @@ class MatrixExponentialBenchmark(test.Benchmark): def benchmarkMatrixExponentialOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) @@ -195,7 +196,7 @@ class MatrixExponentialBenchmark(test.Benchmark): if test.is_gpu_available(True): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py index 720ba806e9..8bda04b53d 100644 --- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -179,7 +180,7 @@ class MatrixInverseBenchmark(test.Benchmark): for adjoint in False, True: for shape in self.shapes: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint) @@ -193,7 +194,7 @@ class MatrixInverseBenchmark(test.Benchmark): if test.is_gpu_available(True): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint) diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py index 723a15fbd1..3205e211d9 100644 --- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables from tensorflow.python.ops.linalg import linalg_impl +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -159,7 +160,7 @@ class MatrixLogarithmBenchmark(test.Benchmark): def benchmarkMatrixLogarithmOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) logm = gen_linalg_ops.matrix_logarithm(matrix) diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py index de495968a7..225a10e117 100644 --- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test as test_lib @@ -313,7 +314,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark): for num_rhs in 1, 2, matrix_shape[-1]: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) @@ -328,7 +329,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark): if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513): with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py index b8f2736b7b..264df2565c 100644 --- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -167,7 +168,7 @@ class MatrixSolveBenchmark(test.Benchmark): for num_rhs in 1, 2, matrix_shape[-1]: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint) @@ -185,7 +186,7 @@ class MatrixSolveBenchmark(test.Benchmark): if run_gpu_test: with ops.Graph().as_default(), \ - session.Session() as sess, \ + session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint) diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py index 31e84341ae..fdfe1001b8 100644 --- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test # pylint: disable=protected-access @@ -192,7 +193,7 @@ class BenchmarkSparseTensorsMapVsSerialization(test.Benchmark): sorted(zip(indices_batch, indices_value)), dtype=np.int64) values = ["feature_value_for_embedding_lookup"] * num_elements shape = np.asarray([batch_size, num_elements], dtype=np.int64) - with session.Session() as sess: + with session.Session(config=benchmark.benchmark_config()) as sess: with ops.device("/cpu:0"): indices = variables.Variable(indices) values = variables.Variable(values) diff --git a/tensorflow/python/kernel_tests/where_op_test.py b/tensorflow/python/kernel_tests/where_op_test.py index 29fb002ef4..04ac589432 100644 --- a/tensorflow/python/kernel_tests/where_op_test.py +++ b/tensorflow/python/kernel_tests/where_op_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.platform import benchmark from tensorflow.python.platform import test @@ -160,7 +161,7 @@ class WhereBenchmark(test.Benchmark): x = random_ops.random_uniform((m, n), dtype=dtypes.float32) <= p v = resource_variable_ops.ResourceVariable(x) op = array_ops.where(v) - with session.Session() as sess: + with session.Session(config=benchmark.benchmark_config()) as sess: v.initializer.run() r = self.run_op_benchmark(sess, op, min_iters=100, name=name) gb_processed_input = m * n / 1.0e9 @@ -186,7 +187,7 @@ class WhereBenchmark(test.Benchmark): y = resource_variable_ops.ResourceVariable(y_gen) c = resource_variable_ops.ResourceVariable(c_gen) op = array_ops.where(c, x, y) - with session.Session() as sess: + with session.Session(config=benchmark.benchmark_config()) as sess: x.initializer.run() y.initializer.run() c.initializer.run() diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 35fdee4fad..ff86df6346 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -602,20 +602,19 @@ class AdjustHueBenchmark(test.Benchmark): if cpu_count is not None: config.inter_op_parallelism_threads = 1 config.intra_op_parallelism_threads = cpu_count - with session.Session("", graph=ops.Graph(), config=config) as sess: - with ops.device(device): - inputs = variables.Variable( - random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, - trainable=False, - dtype=dtypes.float32) - delta = constant_op.constant(0.1, dtype=dtypes.float32) - outputs = image_ops.adjust_hue(inputs, delta) - run_op = control_flow_ops.group(outputs) - sess.run(variables.global_variables_initializer()) - for i in xrange(warmup_rounds + benchmark_rounds): - if i == warmup_rounds: - start = time.time() - sess.run(run_op) + with self.benchmark_session(config=config, device=device) as sess: + inputs = variables.Variable( + random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, + trainable=False, + dtype=dtypes.float32) + delta = constant_op.constant(0.1, dtype=dtypes.float32) + outputs = image_ops.adjust_hue(inputs, delta) + run_op = control_flow_ops.group(outputs) + sess.run(variables.global_variables_initializer()) + for i in xrange(warmup_rounds + benchmark_rounds): + if i == warmup_rounds: + start = time.time() + sess.run(run_op) end = time.time() step_time = (end - start) / benchmark_rounds tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") @@ -646,21 +645,20 @@ class AdjustSaturationBenchmark(test.Benchmark): if cpu_count is not None: config.inter_op_parallelism_threads = 1 config.intra_op_parallelism_threads = cpu_count - with session.Session("", graph=ops.Graph(), config=config) as sess: - with ops.device(device): - inputs = variables.Variable( - random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, - trainable=False, - dtype=dtypes.float32) - delta = constant_op.constant(0.1, dtype=dtypes.float32) - outputs = image_ops.adjust_saturation(inputs, delta) - run_op = control_flow_ops.group(outputs) - sess.run(variables.global_variables_initializer()) - for _ in xrange(warmup_rounds): - sess.run(run_op) - start = time.time() - for _ in xrange(benchmark_rounds): - sess.run(run_op) + with self.benchmark_session(config=config, device=device) as sess: + inputs = variables.Variable( + random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, + trainable=False, + dtype=dtypes.float32) + delta = constant_op.constant(0.1, dtype=dtypes.float32) + outputs = image_ops.adjust_saturation(inputs, delta) + run_op = control_flow_ops.group(outputs) + sess.run(variables.global_variables_initializer()) + for _ in xrange(warmup_rounds): + sess.run(run_op) + start = time.time() + for _ in xrange(benchmark_rounds): + sess.run(run_op) end = time.time() step_time = (end - start) / benchmark_rounds tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") @@ -699,7 +697,7 @@ class ResizeBilinearBenchmark(test.Benchmark): deps = [resize_op] benchmark_op = control_flow_ops.group(*deps) - with session.Session() as sess: + with self.benchmark_session() as sess: sess.run(variables.global_variables_initializer()) results = self.run_op_benchmark( sess, @@ -747,7 +745,7 @@ class ResizeBicubicBenchmark(test.Benchmark): deps = [resize_op] benchmark_op = control_flow_ops.group(*deps) - with session.Session() as sess: + with self.benchmark_session() as sess: sess.run(variables.global_variables_initializer()) results = self.run_op_benchmark( sess, @@ -804,7 +802,7 @@ class ResizeAreaBenchmark(test.Benchmark): deps = [resize_op] benchmark_op = control_flow_ops.group(*deps) - with session.Session() as sess: + with self.benchmark_session() as sess: sess.run(variables.global_variables_initializer()) results = self.run_op_benchmark( sess, diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py index fa17b17d10..4f7abb311a 100644 --- a/tensorflow/python/platform/benchmark.py +++ b/tensorflow/python/platform/benchmark.py @@ -27,6 +27,7 @@ import time import six from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.util import test_log_pb2 from tensorflow.python.client import timeline from tensorflow.python.platform import app @@ -182,6 +183,19 @@ class Benchmark(six.with_metaclass(_BenchmarkRegistrar, object)): throughput=throughput, extras=extras) +@tf_export("test.benchmark_config") +def benchmark_config(): + """Returns a tf.ConfigProto for disabling the dependency optimizer. + + Returns: + A TensorFlow ConfigProto object. + """ + config = config_pb2.ConfigProto() + config.graph_options.rewrite_options.dependency_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) + return config + + @tf_export("test.Benchmark") class TensorFlowBenchmark(Benchmark): """Abstract class that provides helpers for TensorFlow benchmarks.""" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt index abe9b068ae..984c584c9e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt @@ -20,6 +20,10 @@ tf_module { name: "assert_equal_graph_def" argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "benchmark_config" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "compute_gradient" argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt index abe9b068ae..984c584c9e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt @@ -20,6 +20,10 @@ tf_module { name: "assert_equal_graph_def" argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "benchmark_config" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "compute_gradient" argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], " -- GitLab From 494bbdfced3fd8596721d12e73676c4967f452e4 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 8 Oct 2018 13:48:19 -0700 Subject: [PATCH 077/411] Allow using more than one converter in the testing harness. PiperOrigin-RevId: 216242862 --- tensorflow/python/autograph/core/converter_testing.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py index dc2d419d34..fcdbd0a82c 100644 --- a/tensorflow/python/autograph/core/converter_testing.py +++ b/tensorflow/python/autograph/core/converter_testing.py @@ -128,7 +128,13 @@ class TestCase(test.TestCase): @contextlib.contextmanager def converted(self, entity, converter_module, namespace, *tf_symbols): node, ctx = self.prepare(entity, namespace) - node = converter_module.transform(node, ctx) + + if not isinstance(converter_module, (list, tuple)): + converter_module = (converter_module,) + for m in converter_module: + node = m.transform(node, ctx) + node = converter.standard_analysis(node, ctx, is_initial=True) + with self.compiled(node, namespace, *tf_symbols) as result: yield result -- GitLab From eec9ca8f0baccd249a49046fe31b460903e44850 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 8 Oct 2018 13:50:12 -0700 Subject: [PATCH 078/411] Partial support tfe.defun in tf.gradients. Doesn't attempt to deal with cases where we might have already generated the functiondef for the parent function as in that case we cannot easily modify the forward pass. PiperOrigin-RevId: 216243224 --- .../core/common_runtime/shape_refiner.cc | 5 ++ tensorflow/core/framework/shape_inference.cc | 9 ++ tensorflow/core/framework/shape_inference.h | 9 +- tensorflow/core/graph/graph.cc | 13 +++ tensorflow/core/graph/graph.h | 5 ++ tensorflow/core/graph/node_builder.cc | 8 +- tensorflow/core/ops/resource_variable_ops.cc | 3 +- tensorflow/python/eager/function.py | 87 ++++++++++--------- tensorflow/python/eager/function_test.py | 18 +++- tensorflow/python/framework/op_def_library.py | 3 +- .../python/kernel_tests/cond_v2_test.py | 1 + tensorflow/python/ops/custom_gradient.py | 44 ++++++++++ tensorflow/python/ops/gradients_impl.py | 30 +++---- tensorflow/python/ops/while_v2.py | 3 +- 14 files changed, 169 insertions(+), 69 deletions(-) diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index fa4d1eda62..9488a44778 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -288,6 +288,11 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port, "output_port '", output_port, "' is out of range, ", "node '", node->name(), "' has ", node->num_outputs(), " outputs"); } + // Note: it's possible, if the node's been updated, that the shape inference + // context doesn't have the right number of outputs. + if (node->num_outputs() > c->num_outputs()) { + TF_RETURN_IF_ERROR(c->ExpandOutputs(node->num_outputs())); + } // Check compatibility, and merge the shapes. ShapeHandle existing_shape = c->output(output_port); diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 3e77028a5f..4dcc80680f 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -239,6 +239,15 @@ void InferenceContext::PreInputInit( output_handle_shapes_and_types_.resize(num_outputs); } +Status InferenceContext::ExpandOutputs(int new_output_size) { + if (new_output_size < outputs_.size()) { + return errors::InvalidArgument("Trying to reduce number of outputs of op."); + } + outputs_.resize(new_output_size, nullptr); + output_handle_shapes_and_types_.resize(new_output_size); + return Status::OK(); +} + void InferenceContext::PostInputInit( std::vector>> input_handle_data) { int num_inputs_from_node_def = 0; diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index 81258b55b3..e3885b7d9e 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -323,13 +323,13 @@ class InferenceContext { return input_tensors_as_shapes_; } - ShapeHandle output(int64 idx) const { return outputs_[idx]; } - void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; } + ShapeHandle output(int64 idx) const { return outputs_.at(idx); } + void set_output(int idx, ShapeHandle shape) { outputs_.at(idx) = shape; } Status set_output(StringPiece output_name, const std::vector& shapes); int num_outputs() const { return outputs_.size(); } - ShapeHandle output(int idx) const { return outputs_[idx]; } + ShapeHandle output(int idx) const { return outputs_.at(idx); } Status output(StringPiece output_name, std::vector* output) const; @@ -645,6 +645,9 @@ class InferenceContext { return merged_dims_; } + // Adds new outputs; useful when mutating the graph. + Status ExpandOutputs(int new_output_size); + private: // Creates and stores shapes for use in InferenceContext. class ShapeManager { diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 7a4a0096fa..6f068546d2 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -142,6 +142,19 @@ void Node::Clear() { assigned_device_name_index_ = 0; } +void Node::UpdateProperties() { + DataTypeVector inputs; + DataTypeVector outputs; + Status status = + InOutTypesForNode(props_->node_def, *(props_->op_def), &inputs, &outputs); + if (!status.ok()) { + LOG(ERROR) << "Failed at updating node: " << status; + return; + } + props_ = std::make_shared(props_->op_def, props_->node_def, + inputs, outputs); +} + const string& Node::name() const { return props_->node_def.name(); } const string& Node::type_string() const { return props_->node_def.op(); } const NodeDef& Node::def() const { return props_->node_def; } diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 2944951f82..228b1331d9 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -171,6 +171,7 @@ class Node { template void AddAttr(const string& name, const T& val) { SetAttrValue(val, AddAttrHelper(name)); + UpdateProperties(); } void ClearAttr(const string& name); @@ -211,6 +212,10 @@ class Node { // e.g. in AddAttr. void MaybeCopyOnWrite(); + // Called after an attr has changed. Decides whether we need to update some + // property of the node (stored in props_). + void UpdateProperties(); + AttrValue* AddAttrHelper(const string& name); // A set of mutually exclusive classes for different kinds of nodes, diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index d92874909f..68a20fcc5f 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -140,10 +140,10 @@ void NodeBuilder::AddIndexError(const Node* node, int i) { strings::StrCat("Attempt to add nullptr Node to node with type ", def_builder_.op_def().name())); } else { - errors_.emplace_back( - strings::StrCat("Attempt to add output ", i, " of ", node->name(), - " not in range [0, ", node->num_outputs(), - ") to node with type ", def_builder_.op_def().name())); + errors_.emplace_back(strings::StrCat( + "Attempt to add output ", i, " of ", node->name(), " not in range [0, ", + node->num_outputs(), ") to node with type ", + def_builder_.op_def().name(), ". Node: ", node->DebugString())); } } diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc index adc9cd1486..65bdde375b 100644 --- a/tensorflow/core/ops/resource_variable_ops.cc +++ b/tensorflow/core/ops/resource_variable_ops.cc @@ -216,7 +216,8 @@ REGISTER_OP("VarIsInitializedOp") Status VariableShapeShapeFn(InferenceContext* c) { auto* handle_data = c->input_handle_shapes_and_types(0); if (handle_data == nullptr || handle_data->empty()) { - return errors::InvalidArgument("Handle doesn't have shape information."); + c->set_output(0, c->Vector(c->UnknownDim())); + return Status::OK(); } ShapeHandle var_shape = (*handle_data)[0].shape; int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 93168826b1..99bf375ea7 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -46,6 +46,7 @@ from tensorflow.python.framework import tensor_spec from tensorflow.python.ops import array_ops from tensorflow.python.ops import cond_v2_impl from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import resource_variable_ops @@ -81,49 +82,10 @@ def _create_substitute_placeholder(value, name=None, dtype=None): with ops.control_dependencies(None): placeholder = graph_placeholder( dtype=dtype or value.dtype, shape=value.shape, name=name) - _copy_handle_data(value, placeholder) + custom_gradient.copy_handle_data(value, placeholder) return placeholder -def _copy_handle_data(source_t, target_t): - """Copies HandleData for variant and resource type tensors if available. - - The CppShapeInferenceResult::HandleData proto contains information about the - shapes and types of the element tensors of resource/variant type tensors. - We need to copy this across function boundaries, i.e., when capturing a - placeholder or when returning a function tensor as output. If we don't do this - the element tensors will have unknown shapes, e.g., if a TensorList variant - tensor is captured as a placeholder, elements popped from that list would have - unknown shape. - - Args: - source_t: The tensor to copy HandleData from. - target_t: The tensor to copy HandleData to. - """ - if (target_t.dtype == dtypes_module.resource or - target_t.dtype == dtypes_module.variant): - if isinstance(source_t, ops.EagerTensor): - handle_data = source_t._handle_data # pylint: disable=protected-access - else: - handle_data = resource_variable_ops.get_resource_handle_data(source_t) - if handle_data is not None and handle_data.is_set: - # pylint: disable=protected-access - pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph, - target_t._as_tf_output(), - handle_data.SerializeToString()) - # pylint: enable=protected-access - # Ensure that shapes and dtypes are propagated. - shapes, types = zip(*[(pair.shape, pair.dtype) - for pair in handle_data.shape_and_type]) - ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes] - shapes = [[d.size for d in s.dim] - if not s.unknown_rank else None for s in shapes] - pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper( - target_t._op._graph._c_graph, # pylint: disable=protected-access - target_t._as_tf_output(), # pylint: disable=protected-access - shapes, ranks, types) - - def _get_device_functions(ctx, graph): """Returns a tuple of device functions representing the device stack.""" if ctx.executing_eagerly(): @@ -547,7 +509,7 @@ class _EagerDefinedFunction(object): for i, shape in enumerate(self._output_shapes): outputs[i].set_shape(shape) for i, func_graph_output in enumerate(self._func_graph_outputs): - _copy_handle_data(func_graph_output, outputs[i]) + custom_gradient.copy_handle_data(func_graph_output, outputs[i]) return outputs @@ -658,7 +620,48 @@ class Function(object): if tape.should_record(tensor_inputs) or tape.should_record(captures): return self._backprop_call(args) - outputs = self._inference_function.call(ctx, args) + # Only need to override the gradient in graph mode and when we have outputs. + if context.executing_eagerly() or not self.outputs: + outputs = self._inference_function.call(ctx, args) + else: + name = "PartitionedCall-%s" % ops.uid() + + @ops.RegisterGradient(name) + def grad_fn(op, *doutputs): # pylint: disable=unused-variable + """Gradients of this function.""" + if op.graph is not ops.get_default_graph(): + # TODO(apassos) this will still emit SymbolicGradient ops when + # nested defuns are being differentiated. We need to somehow figure + # out a way to update the FunctionDef corresponding to the calling + # function when mutating a call to the forward pass. + return gradients_impl._SymGrad(op, list(doutputs)) # pylint: disable=protected-access + if self._backward_graph_function is None: + self._construct_backprop_function() + self._forward_function.add_to_graph(op.graph) + func = attr_value_pb2.AttrValue( + func=attr_value_pb2.NameAttrList( + name=self._forward_function.name)) + # pylint: disable=protected-access + op._set_attr("f", func) + types = attr_value_pb2.AttrValue.ListValue( + type=self._forward_function._output_types) + op._set_attr("Tout", attr_value_pb2.AttrValue(list=types)) + for i in range( + len(outputs), len(self._forward_function._output_types)): + t = ops.Tensor(op, i, self._forward_function._output_types[i]) + t.set_shape(self._forward_function._output_shapes[i]) + func_graph_output = self._forward_function._func_graph_outputs[i] + custom_gradient.copy_handle_data(func_graph_output, t) + op._outputs.append(t) + # pylint: enable=protected-access + side_outputs = op.outputs[len(outputs):] + return self._backward_graph_function( + *(list(doutputs) + list(side_outputs))) + + with ops.get_default_graph().gradient_override_map( + {"PartitionedCall": name}): + outputs = self._inference_function.call(ctx, args) + return self._build_call_outputs(outputs) @property diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 57e545be69..e46bde098b 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -286,7 +286,23 @@ class FunctionTest(test.TestCase): c = constant_op.constant([[2.]]) f_c = f(c) g, = gradients_impl.gradients(f_c, c) - self.assertAllEqual(sess.run(g), [[1.0]]) + self.assertAllEqual(sess.run(g).values, [[1.0]]) + + def testNoSymGradNestedDefun(self): + + @function.defun + def outer(): + + @function.defun + def f(x): + return array_ops.gather_nd(x, [[0]]) + + c = constant_op.constant([[2.]]) + f_c = f(c) + g, = gradients_impl.gradients(f_c, c) + self.assertTrue(isinstance(g, ops.IndexedSlices)) + + outer() def testNestedInputsGraphFunction(self): matmul = function.defun(math_ops.matmul) diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index e85bba11cd..9955a9a2cd 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -482,7 +482,8 @@ class OpDefLibrary(object): else: raise TypeError("%s that don't all match." % prefix) else: - raise TypeError("%s that are invalid." % prefix) + raise TypeError( + "%s that are invalid. Tensors: %s" % (prefix, values)) types = [x.dtype for x in values] inputs.extend(values) diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py index ec875aae59..a424a0f219 100644 --- a/tensorflow/python/kernel_tests/cond_v2_test.py +++ b/tensorflow/python/kernel_tests/cond_v2_test.py @@ -153,6 +153,7 @@ class CondV2Test(test.TestCase): self.assertIn("foo_cond_1_false", ops.get_default_graph()._functions) def testDefunInCond(self): + self.skipTest("b/117293122") x = constant_op.constant(1.0, name="x") y = constant_op.constant(2.0, name="y") diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index d7834ba350..bfe23834b7 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -18,9 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import tape as tape_lib +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops @@ -33,6 +35,45 @@ from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export +def copy_handle_data(source_t, target_t): + """Copies HandleData for variant and resource type tensors if available. + + The CppShapeInferenceResult::HandleData proto contains information about the + shapes and types of the element tensors of resource/variant type tensors. + We need to copy this across function boundaries, i.e., when capturing a + placeholder or when returning a function tensor as output. If we don't do this + the element tensors will have unknown shapes, e.g., if a TensorList variant + tensor is captured as a placeholder, elements popped from that list would have + unknown shape. + + Args: + source_t: The tensor to copy HandleData from. + target_t: The tensor to copy HandleData to. + """ + if (target_t.dtype == dtypes.resource or + target_t.dtype == dtypes.variant): + if isinstance(source_t, ops.EagerTensor): + handle_data = source_t._handle_data # pylint: disable=protected-access + else: + handle_data = resource_variable_ops.get_resource_handle_data(source_t) + if handle_data is not None and handle_data.is_set: + # pylint: disable=protected-access + pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph, + target_t._as_tf_output(), + handle_data.SerializeToString()) + # pylint: enable=protected-access + # Ensure that shapes and dtypes are propagated. + shapes, types = zip(*[(pair.shape, pair.dtype) + for pair in handle_data.shape_and_type]) + ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes] + shapes = [[d.size for d in s.dim] + if not s.unknown_rank else None for s in shapes] + pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper( + target_t._op._graph._c_graph, # pylint: disable=protected-access + target_t._as_tf_output(), # pylint: disable=protected-access + shapes, ranks, types) + + @tf_export("custom_gradient") def custom_gradient(f): """Decorator to define a function with a custom gradient. @@ -180,8 +221,11 @@ def _graph_mode_decorator(f, *args, **kwargs): input_grads = nest.flatten(input_grads) return ([None] * len(flat_result)) + input_grads + variable_grads + original_tensors = all_tensors with ops.get_default_graph().gradient_override_map({"IdentityN": name}): all_tensors = array_ops.identity_n(all_tensors) + for ot, t in zip(original_tensors, all_tensors): + copy_handle_data(ot, t) return nest.pack_sequence_as( structure=result, flat_sequence=all_tensors[:len(flat_result)]) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index aac95037dc..6909fcaed5 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -800,23 +800,21 @@ def _GradientsHelper(ys, # pylint: enable=protected-access has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads) if has_out_grads and (op not in stop_ops): - if is_func_call: - if is_partitioned_call: - func_call = src_graph._get_function( # pylint: disable=protected-access - compat.as_bytes(op.get_attr("f").name)) + try: + grad_fn = ops.get_gradient_function(op) + except LookupError: + if is_func_call: + if is_partitioned_call: + func_call = src_graph._get_function( # pylint: disable=protected-access + compat.as_bytes(op.get_attr("f").name)) + else: + func_call = src_graph._get_function(op.type) # pylint: disable=protected-access + # Note that __defun is not set if the graph is + # imported. If it's set, we prefer to access the original + # defun. + func_call = getattr(op, "__defun", func_call) + grad_fn = func_call.python_grad_func else: - func_call = src_graph._get_function(op.type) # pylint: disable=protected-access - # Note that __defun is not set if the graph is - # imported. If it's set, we prefer to access the original - # defun. - func_call = getattr(op, "__defun", func_call) - grad_fn = func_call.python_grad_func - else: - # A grad_fn must be defined, either as a function or as None - # for ops that do not have gradients. - try: - grad_fn = ops.get_gradient_function(op) - except LookupError: raise LookupError( "No gradient defined for operation '%s' (op type: %s)" % (op.name, op.type)) diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py index 8e88a84d60..0419656143 100644 --- a/tensorflow/python/ops/while_v2.py +++ b/tensorflow/python/ops/while_v2.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import cond_v2_impl as cond_v2 from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_util +from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import gen_functional_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import list_ops @@ -580,7 +581,7 @@ def _check_shapes_compat(output_tensors, shape_invariants, input_tensors): def _copy_handle_data(src_tensors, tgt_tensors): for src_t, tgt_t in zip(src_tensors, tgt_tensors): - function._copy_handle_data(src_t, tgt_t) + custom_gradient.copy_handle_data(src_t, tgt_t) # TODO(srbs): Move to common utils for cond_v2 and while_v2. -- GitLab From 13b47e6c4f9d7b295948b1057139bf676e394b6f Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 8 Oct 2018 14:16:55 -0700 Subject: [PATCH 079/411] Automated rollback of commit 295b3c80555cc82d8d70faf96a47681e1d904b9c PiperOrigin-RevId: 216247929 --- tensorflow/core/kernels/data/iterator_ops.cc | 4 --- .../kernels/data/map_and_batch_dataset_op.cc | 9 ++++--- .../core/kernels/data/model_dataset_op.cc | 10 ++++--- .../data/parallel_interleave_dataset_op.cc | 27 +++++++++++-------- .../kernels/data/parallel_map_iterator.cc | 9 ++++--- .../core/kernels/data/prefetch_dataset_op.cc | 10 ++++--- tensorflow/core/kernels/data/writer_ops.cc | 12 ++++----- 7 files changed, 44 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 7a833668ac..8acd6cc724 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -16,10 +16,8 @@ limitations under the License. #include "tensorflow/core/common_runtime/graph_runner.h" #include "tensorflow/core/common_runtime/renamed_device.h" -#include "tensorflow/core/common_runtime/threadpool_device.h" #include "tensorflow/core/framework/iterator.pb.h" #include "tensorflow/core/framework/partial_tensor_shape.h" -#include "tensorflow/core/framework/resource_op_kernel.h" #include "tensorflow/core/framework/stats_aggregator.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/variant_op_registry.h" @@ -27,13 +25,11 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/optional_ops.h" #include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" -#include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace data { diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index f45a239793..0fb721cd7c 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -445,9 +445,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { auto ctx_copy = std::make_shared(*ctx); - runner_thread_.reset(ctx->env()->StartThread( - {}, "runner_thread", - std::bind(&Iterator::RunnerThread, this, ctx_copy))); + runner_thread_ = + MakeUnique(ctx->env(), "runner_thread"); + runner_thread_->Schedule( + std::bind(&Iterator::RunnerThread, this, ctx_copy)); } } @@ -703,7 +704,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_; // Buffer for storing the (intermediate) batch results. std::deque> batch_results_ GUARDED_BY(*mu_); - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); bool cancelled_ GUARDED_BY(*mu_) = false; }; diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc index 9aa505f4f1..859df57962 100644 --- a/tensorflow/core/kernels/data/model_dataset_op.cc +++ b/tensorflow/core/kernels/data/model_dataset_op.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -126,9 +127,10 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (!optimize_thread_) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - optimize_thread_.reset(ctx->env()->StartThread( - {}, "optimize_thread", - [this, new_ctx]() { OptimizeThread(new_ctx); })); + optimize_thread_ = + MakeUnique(ctx->env(), "optimize_thread"); + optimize_thread_->Schedule( + [this, new_ctx]() { OptimizeThread(new_ctx); }); } return Status::OK(); } @@ -167,7 +169,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { mutex mu_; condition_variable cond_var_; std::shared_ptr model_; - std::unique_ptr optimize_thread_ GUARDED_BY(mu_); + std::unique_ptr optimize_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; std::unique_ptr input_impl_ GUARDED_BY(mu_); }; diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 6b6b3d6ab9..9c836b836e 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -481,9 +482,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { worker_threads_.reserve(dataset()->num_threads()); for (size_t i = 0; i < dataset()->num_threads(); ++i) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - worker_threads_.emplace_back(ctx->env()->StartThread( - {}, "worker_thread", - [this, new_ctx, i]() { WorkerThread(new_ctx, i); })); + worker_threads_.emplace_back( + MakeUnique(ctx->env(), "worker_thread")); + worker_threads_.back()->Schedule( + [this, new_ctx, i]() { WorkerThread(new_ctx, i); }); } } return Status::OK(); @@ -580,9 +582,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { } workers_[i].SetInputs(s, std::move(args)); std::shared_ptr new_ctx(new IteratorContext(*ctx)); - worker_threads_.emplace_back(ctx->env()->StartThread( - {}, "worker_thread", - [this, new_ctx, i]() { WorkerThread(new_ctx, i); })); + worker_threads_.emplace_back( + MakeUnique(ctx->env(), "worker_thread")); + worker_threads_.back()->Schedule( + [this, new_ctx, i]() { WorkerThread(new_ctx, i); }); if (i < dataset()->cycle_length_) { interleave_indices_.push_back(i); } else { @@ -1047,7 +1050,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { // The worker threads. This must be last to ensure the // threads have exited before any other members are deallocated. // TODO(b/65178177): Avoid allocating additional threads. - std::vector> worker_threads_ GUARDED_BY(mu_); + std::vector> worker_threads_ + GUARDED_BY(mu_); }; const DatasetBase* const input_; @@ -1389,9 +1393,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - runner_thread_.reset(ctx->env()->StartThread( - {}, "runner_thread", - [this, new_ctx]() { RunnerThread(new_ctx); })); + runner_thread_ = + MakeUnique(ctx->env(), "runner_thread"); + runner_thread_->Schedule( + [this, new_ctx]() { RunnerThread(new_ctx); }); } } @@ -1645,7 +1650,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { int64 num_calls_ GUARDED_BY(*mu_) = 0; std::unique_ptr thread_pool_; - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); // Identifies whether background activity should be cancelled. bool cancelled_ GUARDED_BY(*mu_) = false; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index ebf41925c9..e69274e4f2 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -181,9 +181,10 @@ class ParallelMapIterator : public DatasetBaseIterator { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { auto ctx_copy = std::make_shared(*ctx); - runner_thread_.reset(ctx->env()->StartThread( - {}, "runner_thread", - std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy))); + runner_thread_ = + MakeUnique(ctx->env(), "runner_thread"); + runner_thread_->Schedule( + std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)); } } @@ -331,7 +332,7 @@ class ParallelMapIterator : public DatasetBaseIterator { // Buffer for storing the invocation results. std::deque> invocation_results_ GUARDED_BY(*mu_); - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); bool cancelled_ GUARDED_BY(*mu_) = false; }; diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc index 754ed772db..e9c38eb8a0 100644 --- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -256,10 +257,11 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { Status EnsurePrefetchThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (!prefetch_thread_) { + prefetch_thread_ = + MakeUnique(ctx->env(), "prefetch_thread"); std::shared_ptr new_ctx(new IteratorContext(*ctx)); - prefetch_thread_.reset(ctx->env()->StartThread( - {}, "prefetch_thread", - [this, new_ctx]() { PrefetchThread(new_ctx); })); + prefetch_thread_->Schedule( + [this, new_ctx]() { PrefetchThread(new_ctx); }); } return Status::OK(); } @@ -363,7 +365,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { string prefix_end_; PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_); std::deque buffer_ GUARDED_BY(mu_); - std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); + std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; bool prefetch_thread_finished_ GUARDED_BY(mu_) = false; }; diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc index 3f76695bb1..7bb2077b62 100644 --- a/tensorflow/core/kernels/data/writer_ops.cc +++ b/tensorflow/core/kernels/data/writer_ops.cc @@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel { public: explicit ToTFRecordOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx), - thread_pool_(new thread::ThreadPool( - ctx->env(), ThreadOptions(), - strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())), - 1 /* num_threads */, false /* low_latency_hint */)) {} + background_worker_( + ctx->env(), + strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) { + } template Status ParseScalarArgument(OpKernelContext* ctx, @@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel { // The call to `iterator->GetNext()` may block and depend on an // inter-op thread pool thread, so we issue the call from the // owned thread pool. - thread_pool_->Schedule([this, ctx, done]() { + background_worker_.Schedule([this, ctx, done]() { string filename; OP_REQUIRES_OK_ASYNC( ctx, ParseScalarArgument(ctx, "filename", &filename), done); @@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel { } private: - std::unique_ptr thread_pool_; + BackgroundWorker background_worker_; }; REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU), -- GitLab From 09b0fc199129e0f487a39741bdf674cf09035cbc Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 8 Oct 2018 14:17:24 -0700 Subject: [PATCH 080/411] [tf.data] Choose non-deterministic seed once per Python-level `Dataset` object. This changes the behavior of randomness-introducing datasets (`tf.data.Dataset.shuffle()`, `tf.data.experimental.shuffle_and_repeat()`, and `tf.data.experimental.RandomDataset`). Previously, when you used the same `tf.data.Dataset` object multiple times in a pipeline (e.g. by zipping two datasets derived from the same randomness-introducing dataset) *and* you did not specify an explicit `seed`, the implementation would choose different non-deterministic seeds for each use of the `Dataset` object. With this change, the seed will be chosen once per `Dataset` (technically, once per `Dataset`-`Graph` combination, due to the vagaries of capturing state in `Dataset.make_one_shot_iterator()`), which means that all uses of the same dataset object will observe the same sequence of values. This change also revealed a small bug in how `Dataset.shuffle(..., reshuffle_each_iteration=False)` is serialized when an explicit seed is specified. The op-level seed was dropped, which could lead to non-deterministic behavior. This change fixes that issue by forwarding the op-level seed to the appropriate place. PiperOrigin-RevId: 216248013 --- .../core/kernels/data/shuffle_dataset_op.cc | 2 +- .../data/experimental/kernel_tests/BUILD | 13 ++++++ .../kernel_tests/random_dataset_test.py | 45 +++++++++++++++++++ .../kernel_tests/shuffle_and_repeat_test.py | 21 ++++++++- .../data/experimental/ops/random_ops.py | 21 +++++++-- .../data/experimental/ops/shuffle_ops.py | 21 +++++++-- tensorflow/python/data/kernel_tests/BUILD | 1 + .../kernel_tests/shuffle_dataset_op_test.py | 25 ++++++++++- tensorflow/python/data/ops/dataset_ops.py | 22 +++++++-- tensorflow/python/data/util/BUILD | 1 + tensorflow/python/data/util/random_seed.py | 5 ++- .../python/data/util/random_seed_test.py | 13 +++++- 12 files changed, 174 insertions(+), 16 deletions(-) create mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 66466d6a36..9f54c381a9 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { int64 buffer_size, int64 seed, int64 seed2, int64 count) : ShuffleDatasetBase(ctx, input, buffer_size, count), seed_(seed), - seed2_(seed) {} + seed2_(seed2) {} string DebugString() const override { return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_, diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD index 4eef9580ad..a67f6ff031 100644 --- a/tensorflow/python/data/experimental/kernel_tests/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/BUILD @@ -453,6 +453,18 @@ cuda_py_test( tags = ["no_windows_gpu"], ) +py_test( + name = "random_dataset_test", + srcs = ["random_dataset_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python/data/experimental/ops:random_ops", + "//tensorflow/python/data/kernel_tests:test_base", + "//tensorflow/python/data/ops:dataset_ops", + "@absl_py//absl/testing:parameterized", + ], +) + py_library( name = "reader_dataset_ops_test_base", testonly = 1, @@ -562,6 +574,7 @@ py_test( "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py new file mode 100644 index 0000000000..d403a575ec --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py @@ -0,0 +1,45 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for `tf.data.experimental.RandomDataset()`.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.python.data.experimental.ops import random_ops +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors + + +class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): + + @parameterized.named_parameters( + ("NoSeed", None), + ("WithSeed", 42), + ) + def testZipRandomDataset(self, seed): + dataset = random_ops.RandomDataset(seed=seed).take(30) + dataset = dataset_ops.Dataset.zip((dataset, dataset)) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.cached_session() as sess: + for _ in range(30): + x, y = sess.run(next_element) + self.assertEqual(x, y) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py index c208963a86..883169495f 100644 --- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized import numpy as np from tensorflow.python.data.experimental.ops import shuffle_ops @@ -27,7 +28,7 @@ from tensorflow.python.framework import ops from tensorflow.python.platform import test -class ShuffleAndRepeatTest(test_base.DatasetTestBase): +class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase): def _build_ds(self, seed, count=5, num_elements=20): return dataset_ops.Dataset.range(num_elements).apply( @@ -110,6 +111,24 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase): with self.session(graph=g) as sess: sess.run(get_next_op) + @parameterized.named_parameters( + ("NoSeed", None), + ("WithSeed", 42), + ) + def testShuffleAndRepeatAndZipDataset(self, seed): + dataset = dataset_ops.Dataset.range(10).apply( + shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed)) + dataset = dataset_ops.Dataset.zip((dataset, dataset)) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.cached_session() as sess: + for _ in range(30): + x, y = sess.run(next_element) + self.assertEqual(x, y) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py index e3a2aeab31..25d7fbf691 100644 --- a/tensorflow/python/data/experimental/ops/random_ops.py +++ b/tensorflow/python/data/experimental/ops/random_ops.py @@ -33,13 +33,26 @@ class RandomDataset(dataset_ops.DatasetSource): def __init__(self, seed=None): """A `Dataset` of pseudorandom values.""" super(RandomDataset, self).__init__() - self._seed, self._seed2 = random_seed.get_seed(seed) + + # NOTE(mrry): We generate the seed-pair once per graph in which the dataset + # is iterated over, and cache it in `self._graph_seed_map`. This supports + # two features: iterating over the same `ShuffleDataset` twice in the same + # pipeline and observing the same order (by tying the seeds together with + # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, + # which requires the stateful RNG op to be created inside the same graph as + # the dataset. + self._original_seed = seed + self._graph_seed_map = {} def _as_variant_tensor(self): + try: + seed, seed2 = self._graph_seed_map[ops.get_default_graph()] + except KeyError: + seed, seed2 = random_seed.get_seed(self._original_seed) + self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) + return gen_dataset_ops.random_dataset( - seed=self._seed, - seed2=self._seed2, - **dataset_ops.flat_structure(self)) + seed=seed, seed2=seed2, **dataset_ops.flat_structure(self)) @property def output_classes(self): diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py index a4307212da..a82e4b7d09 100644 --- a/tensorflow/python/data/experimental/ops/shuffle_ops.py +++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py @@ -39,17 +39,32 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset): else: self._count = ops.convert_to_tensor( count, dtype=dtypes.int64, name="count") - self._seed, self._seed2 = random_seed.get_seed(seed) + + # NOTE(mrry): We generate the seed-pair once per graph in which the dataset + # is iterated over, and cache it in `self._graph_seed_map`. This supports + # two features: iterating over the same `ShuffleDataset` twice in the same + # pipeline and observing the same order (by tying the seeds together with + # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, + # which requires the stateful RNG op to be created inside the same graph as + # the dataset. + self._original_seed = seed + self._graph_seed_map = {} def _as_variant_tensor(self): + try: + seed, seed2 = self._graph_seed_map[ops.get_default_graph()] + except KeyError: + seed, seed2 = random_seed.get_seed(self._original_seed) + self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) + # pylint: disable=protected-access input_resource = self._input_dataset._as_variant_tensor() return gen_dataset_ops.shuffle_and_repeat_dataset( input_resource, buffer_size=self._buffer_size, count=self._count, - seed=self._seed, - seed2=self._seed2, + seed=seed, + seed2=seed2, **dataset_ops.flat_structure(self)) # pylint: enable=protected-access diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index c7295d6e69..ecb24103b3 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -443,6 +443,7 @@ tf_py_test( srcs = ["shuffle_dataset_op_test.py"], additional_deps = [ ":test_base", + "@absl_py//absl/testing:parameterized", "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index 347af18576..6001721726 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import collections +from absl.testing import parameterized import numpy as np from tensorflow.python.data.kernel_tests import test_base @@ -31,7 +32,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import test -class ShuffleDatasetTest(test_base.DatasetTestBase): +class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testShuffleDataset(self): components = ( @@ -209,5 +210,27 @@ class ShuffleDatasetTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + @parameterized.named_parameters( + ("ReshuffleEachIterationNoSeed", None, True), + ("ReshuffleEachIterationWithSeed", 42, True), + ("NoReshuffleEachIterationNoSeed", None, False), + ("NoReshuffleEachIterationWithSeed", 42, False), + ) + def testShuffleAndZipDataset(self, seed, reshuffle): + dataset = (dataset_ops.Dataset.range(10) + .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle) + .repeat(3)) + dataset = dataset_ops.Dataset.zip((dataset, dataset)) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.cached_session() as sess: + for _ in range(30): + x, y = sess.run(next_element) + self.assertEqual(x, y) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index b7e19055f2..2d036fd0d6 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -2254,18 +2254,34 @@ class ShuffleDataset(UnaryDataset): self._input_dataset = input_dataset self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") - self._seed, self._seed2 = random_seed.get_seed(seed) + + # NOTE(mrry): We generate the seed-pair once per graph in which the dataset + # is iterated over, and cache it in `self._graph_seed_map`. This supports + # two features: iterating over the same `ShuffleDataset` twice in the same + # pipeline and observing the same order (by tying the seeds together with + # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, + # which requires the stateful RNG op to be created inside the same graph as + # the dataset. + self._original_seed = seed + self._graph_seed_map = {} + if reshuffle_each_iteration is None: self._reshuffle_each_iteration = True else: self._reshuffle_each_iteration = reshuffle_each_iteration def _as_variant_tensor(self): + try: + seed, seed2 = self._graph_seed_map[ops.get_default_graph()] + except KeyError: + seed, seed2 = random_seed.get_seed(self._original_seed) + self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) + return gen_dataset_ops.shuffle_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access buffer_size=self._buffer_size, - seed=self._seed, - seed2=self._seed2, + seed=seed, + seed2=seed2, reshuffle_each_iteration=self._reshuffle_each_iteration, **flat_structure(self)) diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index 39082ce370..95bf3209d7 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -142,6 +142,7 @@ py_test( ":random_seed", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:random_ops", "//tensorflow/python:util", ], ) diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py index d5169f7a53..d24df6d957 100644 --- a/tensorflow/python/data/util/random_seed.py +++ b/tensorflow/python/data/util/random_seed.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops def get_seed(seed): @@ -37,7 +38,7 @@ def get_seed(seed): Returns: A tuple of two `tf.int64` scalar tensors that should be used for the local - seed of the calling dataset. + seeds of the calling dataset. """ seed, seed2 = random_seed.get_seed(seed) if seed is None: @@ -45,7 +46,7 @@ def get_seed(seed): else: seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") if seed2 is None: - seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") + seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64) else: with ops.name_scope("seed2") as scope: seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64) diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py index a809151e6e..5df2e38c62 100644 --- a/tensorflow/python/data/util/random_seed_test.py +++ b/tensorflow/python/data/util/random_seed_test.py @@ -41,7 +41,6 @@ class RandomSeedTest(test.TestCase): # (input_graph_seed, input_op_seed) # and output from get_seed: # (output_graph_seed, output_op_seed) - ((None, None), (0, 0)), ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)), ((1, 1), (1, 1)), ((0, 0), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output @@ -78,6 +77,18 @@ class RandomSeedTest(test.TestCase): self.assertEqual((g_seed, op_seed), toutput, msg=msg) random_seed.set_random_seed(None) + @test_util.run_in_graph_and_eager_modes + def testNondeterministicRandomSeed(self): + random_seed.set_random_seed(None) + op_seeds = [] + for _ in range(50): + g_seed, op_seed = data_random_seed.get_seed(None) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + self.assertEqual(0, g_seed) + self.assertNotEqual(0, op_seed) + op_seeds.append(op_seed) + self.assertGreater(len(set(op_seeds)), 1) if __name__ == '__main__': test.main() -- GitLab From bc5635dc3ac78007caee88fabd81d23ad945b637 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Mon, 8 Oct 2018 14:19:49 -0700 Subject: [PATCH 081/411] Update performance documentation. PiperOrigin-RevId: 216248418 --- .../performance/model_size_vs_accuracy.png | Bin 0 -> 18946 bytes .../performance/model_size_vs_latency.png | Bin 0 -> 21380 bytes tensorflow/contrib/lite/g3doc/performance.md | 21 ++++++++++++------ 3 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..44d0ccd3128dea1c947e57ccbc4e18b2d34cef88 GIT binary patch literal 18946 zcmeAS@N?(olHy`uVBq!ia0y~yU`l6TV0gyC#=yWZ;h>T$0|Ns~v6E*A2L}g74M$1` z0|NtRfk$L90|U1Z2s2)~Tla^7fx)uGHKHUqKdq!Zu_%=xH?gE3C%+^oGfAN=wWv5V zKTp9(&q&W$MR2{3l6>mg7dz=KI;8FuKNAgIqk|e>$ZNkl5!0cX%TSZP;7BD z(0a)LW*(Wp#6lP(WS|fqZ}t3*$;XZI@9lYR70#jPrL1|bMT7O#m6gpiXG#_p7Vi9J zo_&p{*UdR3B;+K^xBqM7_RebKl@8jNI^ z6_#GN!|C&D!!9l4P&~x}GEw!_wYA<34h_3%|NW_yv#*P35=^x&c_DClxxc!CLW6t1 zT;a=0s%(5R5tkB0Z|y35y&`6((d+B$^}p@oHqO#)(WqTwA-rgcgjr6+I;Nz%yGnWF z?QE_aa&mI=$lJ})3SIT!!Gi;TettfA>J*cLvGHWj$!dw&+1`7rzW#W*eEz37)>&6H z-n@CUVf*&#g z&FTI+y1G~Om6etM{Qb+gqvog4TMNdyzSXyqZ{G7J;FDonS!)MQ?W#6Bq>iy#C zYH^0$Wp5W1K0X$*CPJ{fx>`&>ZchH2lP6E++}zZY1-A8+5rC)+i3s_4Ie|K`|KPMW40os^v1Ja?|Fu$qs8uCA}`|3^o=v*JMU)5OZX zqx!p^s8)!Aon4&myYKhwmwQdsTC#lk^WRmi0&R|6#)@8?{{H?#!ooXCUJA{eIg^Er z&8XzXg_z@ezTc}>QBf&)e2kY**6PTwudgF_m+3OB4qKb_{M_7@wzi^QUoyAl+?@3K zPW`{n^JmPMAs{H|n317T{r%n3-S78l8>|!4joOfYUM}tItfLnfyH|dC;<@kmyBiyu zmn>14G-=Y0hwbt!;`Um}T9>IMPrd1{udR)Uin_GFzJ6EP z+f%zrUtifa+dTi%uh;88->ZIK`2Sz+p+ko*yewhxnPt*xoPO@Z?fm^8kIUCTc>TJ2 zV&KENJ8bOipx~9Wt2r@2(b>SvEbZhZ)gwob7QVS*$R}fQ!2Es<^MS?g{V%Sp6u!Hw zR9Q`}t)qj(s_c!&*H>3Re)@DO^g!N?4ULY?Y>8P}Q7bH31XMi)o%Vc^k&&5F`}>>b z=Vxbwm-)?I_2%B*=$BVkPM$Px-Z}=al$0ftCQe*9W7aG!as9ZE^U)iVS}*OZt#%C* zIePncbmYF8owv57pO<3|xODmQ)ZOLpwbIhkPCY)}fAyR1Y%|TcxVWxG8mDe;&7OK~ zZS-^L`|DzNPm!~&(ztf*T32gp>!zV z`t-xY!(EFsyb=-?Tw3NkJLY(gq_Nvf+v;ui{gxIiinvB@NND`@Y_o>}Ga;qm&Z@3{~IX`DSIUvbtOS`&;g->+9z~-?L+fMNn|?*KMN5;`g&N zgs+cV>fSFGvOZ2$TU&dX|NM1!KPJZWSbl74Zx@!7Jh`v-_l9lTjvYSycxRH3lZZa3 zTy>R^ky#PFT@O_3aZ}eW_JE#w{Ooj zOg{GH)&APw-?HxW%iF1FYIZ(7J^l6D<;Bm>y$VToaA>ev%K6~kJHLVggYWO|K0a%H zKjlCJW7YbNiHF%XZQAtW=4SP)tHYPa?X6m}Y}u;d*;})&KKk|bb>+FuTemK)`T42p zwvLWYk?oR+GiRRs`}=#b?Ba_VMY;Xb=6!9v(nW7?nF{_(K@Xn4x=lnlU zPft%j(7;&p|L^yl8ygxE54XLv_2Wpa0M`+bd#goSJ0`ZYu=x1;Zrrr#)c3dzrLV6UW?$1ebm-8P^>YHZ|M*eS#v^&?-{0Ri_f#57 zN=hC%eq2~gtZm{%L1p*8C-?vVGdDCeOi4>CdUL~Yo=xSU?Dcz>_3c}_e7SM`zdy_G zHM8IRJ~c8jGBGpr zB;9F`6F4MS#_zA2w_LPXCw7+zH#hgItE=5-o8?N_R2V3`_pu~=d2zAi?X9WHd}lv; z^=el7lia_*zAjn1^x}@fWUbIuF8k~LcJ%h@8qD*Ws+KHpPrgJ(zz1JaNW9fJD06&6=?Ha{G_L!Td_r>)T-pgfd>bh zE1%CT_p%aEbW!vyRBI7fdSzwsa!^{>o_{~=`cz>j3A>rsM4UQ;GLCczmb|*cX>V_@ zd8Vb}=!p|E!OcI$i4&0FUnQRHi2_beP9Hvf(t2sY)Az*H{-pOm74u)7abG5bOt^Ud?Af=i=cZ@+f*PPA zCoWyubmLdlh6KlTZ^PC`iF)rBR`=7nc73KixXs#Oq@(Sf7@c8aJ6-Nz?mxRKJ=;Uu zE@W)lGPl%MMm z6D0L~vG9y-Uu}~2+b`bkaC+vcH`mw8A8zMQKQTd3!|Pk&a=*EYLRW{Grf9&pE*|lM(x8JW@9k)DocUj5%d$Q{4>c{$Iy~D!9f|vWLs;aWi^*cU6 z(Ye9i{jG>=;-@DkEv&5{KYHX85D+kLhLl;(4DDqy)Ai$z9X{-Qx38<~(9xr;4!swz zU;nO}{^O^}OWSnsX*v&gzu)J)_~L^{j}G11n%&vkn|fh^hjNIBd7g?|tauBGZ4_sQ9hF=lLfewpx_U>0Y?~oZb1#g$ozP?5UWzI(+@7kH_VU zzq|;vv9Xb`C{S>8blkCf_v9HfI(F`~Tphmt*sWW$f|vV!d~#AaYI~mVbiG&xi^@+b z4=W}mtP|VECba8VF+MXBN)YJsBoZ<8|-NoVS<8Iu%xp&K* zeH}{Dmfd}Q&sMMB_vp*Z%P~6&8qb{ZVPa+m<+lI7uJ7;Y=vWZ6)N4)L-d!u4KneUv z@ESXznad4MPnPjJtc5=iv!g+64KI-A3r`kQQ3W-)cc9b?jJsWJb2^A zjI67xF7B(X_M2-pHQ%S+JmCOC=@#AP7Tukl58LJI9_;;oPdnsXm-9rw?6j1W36mx{ zxw*0R%iAA2cdjqK?q_S=-(QRdNk=%g<=zgPq6o@+O7a^yHP2s-TNh*b_5HD9$5K*K zo>cR?21e{E(M(KCY-w#3l$2cAHz(6%R?P8#wZ9`p)Rr|evu~>WoVG4*Z6eyxURf6_ZEbD6X6@R{dwVR+ z%*RjPi;K!eMn*4gZPh+K zU0>bSR<>bv`1(n6=J2HWXS=z%O`1B@H6UPu6>nx%)+)LG;kmcBscL9+oH^t3?*9Js zpiawcyG>13*2l{yCnrBUJA1ibX4(6Db9a}&7ZMe94G9qebuN^Y7G1cm!{u6b{$?{f zKL;P5nvs!^w6rv+$~$lWKjy{f@O3dKPfgWk+)@0TPf=0PRpjMrRZUILqJ{0qMThp< zO3AAVb6?b_Oj`P6@5lQdix;LX75Q44o|YygB;*tkA@T9!$Cj3s2~(!Hl$4lEn>Ovw zx7+zOUoN_Xx^L?K^8(hzSnm7rh+E1eqoK8x)z8mQqpPX6m-q9tv&u?JP1ob9dFAEh zlai7yyetV>71B9jg21lQ*KW;hyagpCDaU#wv+hsT3SAVm)XOB}0zS!J*;aw>kT~Cb|9m{g_ApB)|Qi1Ly7kuelezt?qACpRDyU zkjUNL<%Wia4{vNtJ~PjDwr%w{o3BxMd3l>S-u_qT{`XS7@}mEzN%kjuLB*_Bc3jcC zYc@5yeN(>Jzqd6uH(j=Nv95NT+3^z^|5qLpKk4n^!O<ebq6YHB9g*Ln<-k6qYXUCzcU zb)-kqIOEzH$;EEHTB`&=qYQxz%F4WZOTSf3KcnFl7ZXU|^K?$-wSw|?T;wNCMr^|pWY=P-qZ{-!W`dITxpgr+oBWms?_aZK&4%VX`F5bheP)(v_l6Ay&(6&~ z?Ji#%5~9eV=(Tc7B*TSH$i%J3UQT`_jJ_fuqYKmYMR)SUCLs^|i2|z+v&l2{UGJI5;^mnPy$l z=o4rWSn7hmUghTGJb3zacirD#7dIq2xA90eElQCue|1GOYHOD3DfKYVy_aj|TJ!tu_E)91wr0D3e|NXAs3_^pjg2X(sfMMmLV|*VF1#$^ku+lY zQGCTOP(;mtUeD&u#(H{sEUc`e62X0^r|Um{_;6vKyQ}NNM~|2qrcRw&^>@k?5fiDi zr+qdHEMl1{;;!D<*!bhe4~7TNpSxdQ7dz7=Qz$Yr()HhdZ$+1voBrnTxGO!Xm@{o= zcD+}XTKoE=8z0B;>@EFPd5)Wx_vxdf-Jmw-wYAa34-c_MZO=Po76hs3V(F zPjhf{pT7M5+S+JNZtll#Zf-tu^ytHP@BDU`z5Q~me^%B?d_fX``cTcm>mKQ zPft(ZoN`j=Ue#+|v;2E!Rs=4-vA@24-sdp1a#r@0wj!*Y-G6Du^c~fnJ;kTaEH`cb z9q!CoS;5d{ysHvv#f-K$L6%N z7BxQ%Cd3x3joi$}&E0Kd`1ao3**!fyG3!s9J=@#LEpAltAt7vi+*$ScHH*F(CmrEv z=u>E%q<{X#3g71JtY(=~dD{SN8s1@9NdsS678Hg9^BspHHVV ze0hI=e)+vh_xF{_$9fXa%rJEL#;>lgzkU(l*QpbXMU{#s?|Bg}_1P%%pWUNLmp-+2f*L{6&**b0aZtH8;u2mMLWM`lL^78V_TU)c&{Z&*{ym8~k zisScp6gGePQnF{yo}#Z`ukUMaZqB^1Vd1T9RbR7WVq!LI+H^{I^5VtG^CGgYu5$hV z?=L9%iE4+XytuHiGH&zAm74qZ?W_5AGyUhYr5n5+S3gUg0BR*`O1ajZkA(E00~Wi1 z2HIE-=FDX<0gZ8edV1Q^ z-=F=#tE;P3)YX}9UY<2^;>48HR6!AuC4SDcX3bi4{D0`}ZMmFWTuR2qlYf7I|M~U$ z{m*vwuTl8=`ug!3H)d3Rezr1rR@Wi{LBWOpYJY!wYm#$g!-eZ2P7>Rcbk9wiJo)DK zeEHhn-$K8yjoRA9Z~te)GT+%owZBSAUj^AnM=CLF%e{T<&d%a)lWF_8xVe+BuZw;4 z?c3Yiky|n@a>rjkeY*Sda)0IvD}&YLY$`q+x~_B9ZR^VJohvuqGb;{GzLy!j{lax4 zV`F6vjgIg4s{5Du&c3v>I6Wvh7}QhBxU<7>-MV!a*4Cfj?S8-Na__r$?|fz$G`jUl z1#Zua{q^M~b4vfW?5$B>8yjS5e|>RWef83o%wVgM7Y*yz>t9_RUcc_-%gT@`u_d<^Di$i>(`!S(%07)(~s+!JzM(qwY87W&9w#% z#OXi1wY2b*;`7Y2hp#{K@cjSzo^f$-@-3@do02k$dwRI)iau5r2@44wdUUk==FZ~h zo7?`L(&&9ZbGqZFMf0Dy?iXJh{Y03Fr$a|D;D;*Hy12bwJByxf*t(Tf!OU#hG~MV& zj~*TBku<&{;q2t};^N}=s(US*!dGVRd46tg_wL=+=gyr2jra9lJbk)bGkDp9hYt^C zuity@;o){qZ|}#G{p}PrG&tt^xd#LY{QLLMXNCcz!u6)p&z?P#u`20!@+8HrPo}fI zo&D@AQ)LSaiH0dtMA&$xTo$|azPP-cUsF>vt7dQ2S0x372buYkjn1F&INY`HY~+_| z+B z8+-L_>gj2Qd3P+@_~p-CkFSqiQR6?uz;P~@lv$31b@{s`>F4J$q@<)2e0t(p_VyO2 z__(pZ-oE(xxwEE4(}JHcX4RFwxzV_Oz5d#$tx4C`M3%g`&}f={?ZbzM&NenSpb_2s zb-#5*wL%_TTIxN=x?J!2>lGm@h5YB)DB9W0tN-&@zO_m=b>hT{ZTGCI_j;#^sOszM zOR5SuiC9Wn8Y`^scX_6HrfA8M^iOvmdhE@Wlasr$CQ?{gS-HeYHZU+SW@izrwYBw~ zyLTJkT-2bT*YfpPR;}URyA6!YS1xYbFP@%#eVwYBTAN$1)XL+3LUVF-LRWY756)zxL_V#aFIe2`b&=>;HzZoA!OySuRf;!4B+x$K~ReSlryEkrF zoVW6Rvqa0v${fnp)&KeEK4;DxE}Q@BgMxz%&Gr8Xw>xntdQGo!KL;A& zn_-v?>UM$p_zDUQ`TKsh`OUR@`10k+TU)b>|6UT-_nXF{@kfZUSIRV~pShr{OwGVR z;6$8*NqciM^TUS^Z7M&twDZdsJvhMl;Jnh=YuBbNS)#Hn=VnmC9#QSECl?pHSKW5& zlR0@if4}eTZMj!#V|SOOzPz;b)!v+i3l}nM$-ds_JKK!$rgGi$b8~~w2a2@rE`J}i z+;1+^V?ig5K&z+GbI#5%Y+mTxUi9}@>9;pG4}ZU3e}0;7v_<(lncLg*)fE*Rd8N%n z($_4V2Pp_ZjjW7}6;VGxrRDnl|D?pl#TPAJeDyIXVj37hen>mez-W?vjfX*6TH38& zu6OfhxlaN8mG_C&R)%)Ygx?Z@9*#9^Q0_5u|;EbiY6~}&)#<$2+|M{)lir&j*|`~BtB)#9?&Wi^`DCm+A_ zS$*e4e{ewu8jCu*ZHk+V3yX6*-_y0*?>)M)G5P%6g(v3g2!6;}=0E@3MCAE-wvqem_U_8f%JK>c5i!rdcjQLI|Jp!s59ZO) zC((2M{`q`fMNRF}+Gz8-zhA?}^yALVv#tK|zW)F0*O?aJF`SQ|o>;HV%*i>^!YM3a zmLu``+1ccupPrtXZ$JOn*Vp3dHDJ?Hz4jfw*(t34BF~gX)iA=y|b%S z`})5vTTDRZid&Dw!Cj@VyKEdlUhmkXa)09H`St&1nr2^n@c6NFN{Wh^nc0$M%O=g7 z={a35_Q!)}{ww?w#lVK@yYD+1`Sa6LN&7k*(9EB$?c5zZEY6%go65-xHce?te&M(9 zWy_Y`xOr2wR0pgnHE!Q`$jo=gBX!XHHmC=vxxpfOSBWOSoK3|u?)aTWtaJUA*F{D~ zK79UsdEDm2!)+#+msldn&Fq|fe9zw8+zgsVss8?M&cX6|u(betf2;;|b6Yf2 zxnwM7+t=G&Ul*HvWkukR-@hNfcyZ#(%ga0K{#Grtwk&wS0BU_5JlOd3^mOCWS0SgS zYO_0BT@^atz6$Kwpk)F6*;yu1tHRg&eSLKmG=^}nnf=S_>;32ET0egMI{Lv+XkVw` z>#I;cdAl>mB^3Tb4UZ__>^D?N>!X;z*`*FYhja{YMe0+R6ze!6{F&M)T4E zNS0p2wA{3vSNho9-Q_nor}MwPz5V&Qxz-_TA~sqifme)Z6m#jEQ&LizVV*B1CME_N zI-6hj%QN;lcqYMVPYkPA`m$xq7;fCSQSk5(tB{b;j`H_$6WP^3<_C(2R-C^%!!Wtx z!vn_8&(ALpUmvHUs%n^jZ_lF+UvT)Gsmv;zHg#(2)TyGN!L-=jWp{QIF5dA-2oxq7 zljm8j%}hyYS-(EN@Td~l$Y-Jz=OdT<&5hVoVYog2{%|vZ{cr44fwVW6QJq zkr5FmYBN9%7I0m1VB+R^w$+O+W;DEU%{+Ygu%tnP10pyg=J?Jsd3nCs*QcPMU_sj1 zS*}u3KyGr03{5Bu+g+CX=FJ<8_XH>b^iSMxBeX!k6&;xqocdK`rHL@-QhK#se7}icG!l5 zgG^c1*ZID^we{zl&F6o7yPeOyXT#*l!inI)>cVGdB(*|TILx=JW!O>s+pPNgyQO7s zZ|VQzIt3aLZI`cGk*6QGXU5IV>Cdg)mS27vS^w_G$H$;1=3GDb#csVb+w?W!HNothYufuhIm1P;WMoeMJHoJ{omK|KYsmMJ-dZl;#1S}&OXnP@k#V@4f8&-d zM=o8O#Lh2gkbJBs{gsM=fq=Zcyh3%or@Y&?m4#tLtpiv5)$W1Qq zw=yv?ftm)fyGj%-EF$jKzrSbu=FOXiH|&;D=6P$jeSLAUxm#Sn=;^7cHrH?OF4xzM z-ge~b)v5pf{+>K#%8~H+T2~(*o_qK1Ra93$w_NTw*UD$M8Sh-b_Sv(g_y7NA9lN`1 zY2DvnYa%u>F}McKu&FdUckbMd9XkX>M3&5(KVyam%4pom>sQ04)cyPUe8=wHi?3#Z zD(~Q8-4+3-JziWfUdi9z-PMWM;Ba?c$V#EzWp9)I{rTDYNH6 zo5$slmZlc9c3R+Kw}n@;cGms10(Bn!{`~#>^Uvq=7c)$BqPO)(nPxG3cs4sfXn$R8 ziIwcTckfDGU*k14Hr9*Z=aZA8V_W?#<5BI9y8zvp$&W#FnI|W!`)h=+lbNvh+|9}A{#m9px6@7Z*xhj19yw`gSlaCpsosnR;bLS3dTDD~V`lfl_{RMVb|Ch(bzBzmI zW1O@?^_A7t-{18}8n@Y2f4i`wFnQ9XNgqCa0!`0dSryvd$jm-z=FF4R;Z!{4Wl-~*)8V&V z*}`JR$H&JnZ%lS)`1kw%|D>#}F2Ci6FJGQ~u$i6Npzcq_xw+QUm-{CtC-2#_=f=)r zb#c9z2l4-ZiBI@@SkcaI-<7@YacvVO2%Pk`Lz;!wlwuW2cW&cZn0&l%&);vi8P>$^ zp7!u?yZEl=h6aXGuggb|wpM?Cw=x$r=l130<<_cSo40iOdQ?99#&7qd;aIQq>hJFy z%j)#w_pQ<6nlod@iGKTkCth4!-1w%N?Rc;B@fQ~tuMAqs_3`7!S3A|z)M|b_Y_Iut zGyRpFYanP|pOKOA{GVAxToua7%3t4G-j#Z)xk-%l!|( z%swy48@;#6bi&`r<+IK6pZ)py*~QgWP*l{lpkPB(#NRKM{WZM4ZF#vh`?`Ud*}3B4 zzxMxs`hRC5_tut^->pF1*|W1uRa8|O1!QH+)6I=bmjpH?6|X%}uK6^_!P# zH|xI)05v5N6+Z2Kp7Z+J+8Z}-K79N3?2C(wL3M2F3`S9K(??(7{-ZZ%&F`xi7)$`o z!MuIjJAc0X_jh-b@9ZdSjdBSCH?wSZGtIxYWy6Mq4-XE$xv{a?y8PXRHIbWRUMGQ@ zxE<4%2Y6>bSrM{QDEU~=!vl@XuA=?m2GY!LKG1%fMigX0hZDg?%%DylS!-U1nJx6` zdUYs#T}*+w|O%>xTj~cD5d~ksA z)%ErL&CSfQyGjloQK>#~?(*fy*5&U++&PZAExu?_@*<$rT3NYSD|FR`#qRyf=b2Z3 z%K5FhZJEF)tp=+t>Mge|Iu2+gZ2EA?&F z6k7xqt1n$9BGr!bwpuXHFv%2}t{<;vVlrj1d;g~D@9&hl{5TX_G@`rS%-bvj zTH$topY8cCPnXZH>+0;}bZ+ChSbgu*RBccPm7)CI9YqzDB`ch3SIz#|?^!j;ar!B) z5Vi`RFH7xj6g<}9`@dp=-1M0RIZj3f+`SaNS|dH8&O{j=@&>y%ARr_PumacfKF;Ts#1 z87)d*iTwNbZ^`oI!HeB^m-);*v@UjcM_=Eu!-tu(v$Ln^L@wGlZL+%mrtIr_Y3Jwh zIz&WBI669lrs{g#7T5W&3R?^6e;AmX>o0rn7U${dsrpaO&}Gu}Kc0Ee1ylZ?f?TF3 zZOgrFwzT;9xt-tE#_kUL_~lE;q{)-HOM9Q4oh|N~(faNF{r4KZY45^hmc2Ri`D($j zywvCyvR@;(xw)Udxw-jNbpGC@OO`Cr@SSZI$|q;DqU8O(*gz52nFfhXM%CYPTmwa( z9%y9ty1dL+6uSC0YHe8MpC22G{{O38ns#=US4qhhAxX)VCYhI90!3UcD?TiE^z!9O zjjlyLv(0>OZb)oiq|x>1!9nJu7cT~SdU*w1TNk_f%G&7g${!yVPMS1n(UohR(0;i#^kGZ z#Wocm99BkeUsv?^*VpNj-#jn*@Zg}(+xr1n%1Du)Cn6mj9M)-`onQY?GCn?@Pu9w% zl}mJ9(XxjTwJMX2XU?p;9dya~=eq)xy`}Yju~}QA7A;!D!NKvM*Zkgz(mxZG-G6*M zF3C0)%}uGNm+#9H78c%B`ubR(to4iQ z>*ekLei7ccZ(mlmn;V;*ot@7di-}cVU)g?*0+q5Fa}ZN)_v?PI-Iaf+g%dQWd~U8a z^G$x%H4z&ZX&kOd1T8YUb8mIHzMAhW7Y`4Pj~_p_zH(oD(WdnGdJ)%$$K~sfoII&{ z{hzS9U%g)-)N=P{z$osUhoPB+bpuwZhh{*w!m) z%;xUyZujFsbNa4jpmORQdHMByj~HBBTnc`F%PpPB>#To<;mw-)b^25H zeVeH4Zj^mZCnhGQ=xzS(ZMxFtc`Tde@7uC<>(NuExEdZjNC*lJ?(FYZS5aXpFaKWi z`^~Mb+TPRkTmv`BdvNgb)@Jq0n>SC!wrWa82ZzG$f_+tAwHRRI<>#~S@3RH9HbJ`$ zE-Y|-^-t0`ZNh{J4fE&U5B?3Rn~rRr=+B|ig4Sp_yLo@w*;zhwEDX>8ySX{N_}LlB zEm69`%l%UC?kdfi9k%-D?c37+{{Cs_=6Lo>nSScce0FB$lI6=6U(L$Aw1m?*{anqf z>=jsB5AFwcmA-D9;HsMJ_oM1aP1I6sa&VLdgOyz5v}N6`7Hh0o13rF z!~TUrYoSfo%QRnDoj-j3eE;!&dGTLAj`z!7-dkNRWtQVHSm2tFNzL`gexkT&s@{4mOML1Es-4jkuGKaI6U} zTE9FSRyUpe_vUo%?{A6u|NnR=f>vi&i2&1 zySu;2J%9cB^pB5^i_?Je zzFVSn=gpfp$GY4v*Sh+f4ydPo@#4hL)nSYRqN1&vHy6*FF=_H-Ny{P?IXOAd0;>lP z5*Ysc{rmFvcKy4%N%l+lI z-e`N%^gh!5Oe}|Di$Tizn8Iy;v8}!FS@*Y@jaSLQV8WFvA?M~=yLXG}UfCA8ISsUs zskoMT@vXKlT^CjNH3?+k;+;N^ZR7f-Y z(26b4LLzSNQlRYW{QZ<@I?4?5iJ0G8qI3fT13`lt$E5S0WLXzI;W*sJ%e=t3 zo$tu;BPplm)83zgxR4JtgJAR>%AloPyK29@2rLybRA*s^Xt18fB$)_hLbMEbt3=Y0r^ZCc?_4`+S z=gZm}wIwQd-D}YLiXT6IIDORwZFJlf{_gSYB;lZWJNJgKj{|MznlNF4(|@7s87BUB z|7p7hf@=1{qM}n<{~zfTo}wM@ch}6!Oifvt8Qd-ZZ0vuWSIVU0=H~Rv%Y3Ep+_|$N zV&kG)W@%?6KvP~1n)$!X%g)Q2mtNv|KikOAP*Gdk`>vm_Z|Hv+`#PKD{_~fGt&KV} z&o(-j`O5W~pQX4b!Ka+3S`|%?m*bu^Yt}67^G+O!M^?X?Ds8!WvswK!>^0{5t(yqd zn4#|va-r9kzvpd5>aH*OzUSN9+r?jBg@&z-nwsrZe5^-OMMdSo^XJp^=YyuLe=}#? z-DTR?*vOE@#~>pk!@v`=VPcB%iOkYn~JKc=&OSr%lA&3sO%1!cfY(X zH@Y;wt}^HPI@#G~xl9iZwQ}!##~UI7o}k`a^%Xoptr*9;KBmxYm2d1B-`=m=X3m!S z?00)t<+tl=A}4nWt3SJGU;QoT+#Jj1u+^!ruB==Wz1=VOa@N+0>gvZYU!FWO(|Bdz zVz+&UXVIsY{cCwcSA`tBe_vi+UVclIu5R?UC!e04cKF5-u|4nZCmR8Vn~Q^Vqqn7e ze|Ptl@9*+=cOG8$w^y~XkqHkEFR_xnU;90FUh(a%+3w(}aL_`rn!jJKgN8n3W!K7u zuMX2?db#V7pnS1x{wY0ad=4W}bqw=#_ z^tPPDv$ITJIWsQzpMTE0|M9V2Wlhaaw_d4_C)MX)*i%{jH7fnwoP(Dy3nwT4eGlsV zJ_=nNSX37+4Qc|E#;%^T+>}4QTUqt*`Rem$e0wurXPoX@c-GM9R6WnK z7tj6u4>38u{OqMmla?%5BHpv^$KL<{et&s&l{-24@$+-unctM&Pru}c7(EpK`!8y{ zO@DaalQSo|{NP}-P~WDC*queLwf-+ktRQQDwI{!yK2g~{XlqvJoVjza*8bdUFXEc` z=tw7M75Un*)9?5Hj|)l^x2&z4X;W#$D{bbo`s$_S{_=8mH5?8hAtKM7J$v;#?ahsi zs-k5RFHfJo{O|6_9R-TKQYHsZp6s-({x-$s#8IdCI5s|+70;IV&9!<_VznS=Z~Uj< zcmD*{AJaILaP-I#jr4PK0!3T{g@lDg-QC?mYg$jOiQGKpy#4<@EsM1y`S|!wfnw(E zY@5nWn+hH}xdw`)K0MTV3bdwJXrW|FPY(}hgxM=3BxGjIei^G056j|bJ(rev3YW&$ zz3&Ij5x%^(cJ`#HQk=X!q-dvZ?;Wi4%AB zX`lV_RCRy7IcQhwmZ-IpX3bg!+P}9leEqyh6DJ1Fc_=1bTlsS}X!+c!IhMsDuA0&B zr~j!l-+yMlp;1xV0nj#&(o)t1Nk_X(vaWQzc=1B0?^FRRJ3DBdCkh_kVY1XRy%1sT#GFC;bZw44T<^7sc!>y0SKUIvcN4!KWuDmHIZM*y!qaE!t4| zIjyFqrpt2Gr0MtHKb!v3*sW`GxJyWgh)3$>vkVNJ?x1r47A{-}nhtA7Jw0vV)htQN zqLi7l_c33{Fe!O?srA{}*_(@=dNKU_zW;w>dOEwq*H>46-v9qEe3Ig{6$cI+h_N?0 z{p&aDr}}yqclY4cVY**;%recMcJn{W+Q`jqpv7-@`+9noL~c&ol7D~R;dcJ%2`Q@1 z_xDRz?oY9<`C)K%Rp{Z17X$Bxi{00+|9kh3QT;KG!f#w2US5yZZohZv{CR$DZS7;d z(#OBPzRt&%qM=k_HX+0@5Fh;$PVKsclNxyyFkOD|9_sZ2W=GW?CeyPN&Psn-rQeY zKw!da`-aqye?NV{f42VXBKzbe7RCiHU%uS)^;$HKq*2Sh+TTn8YoknidV39xjFP^* zxVW^;<8S7hw{IC&L~Yfo{q^PHlarG}R)_h@YW4N@F3q~SN>wtc^XA@a^L6Xig{%(a zo$%ee@6SE?|LgZwb9b)Fe$4PsJot3hnIN7mliQY@>xoJe%v`DXNK;#h^`tYq=ZXby zmMQqQ8F6wcy;9Or^}ej5s_h%7qx|7OMwvtK=1m8-_~;s!UaMBjn|1y0_ivT6zWm=^ zbnRC5&ofD9@10*`Tb=Li?Y*b|f84Eihuirt{h6h=E_QcY_4jv*s;aJ53nU~Z6Aw1A zGW>dfJtaB0dDg61CwYpUc)#y_Si@LVT|IH;%uZSBGLO{Dx%+s^bL3Xn@=P|CRckr0 zE_Qc9dV0HaJD=y%{qtAvD}LU0ZK}!TgWuoZzj*PY;@`Q`r?c;j={s=10W{*yFlo}H z4V9mto#ZLL^yjzq+miYE%x$yf8yg!v{QTMkn9k3$ZSL&sEXhi;s`&9iQAk*L;@r8s zpcdMn&(F`lc>S80;mY;vi??kPyLRo`WYK3W((j6eB_ukoTnQ;DFJHWHA>-DqTMJ4{ z=dLkOcde_l3tJPxsI9HNGRNxd^oMo( zv9D(5hfkk`)~;PUS?1-D*Kgj4e13jjJU2Ht=w`_r{euS&COtgVnsls3^3bJAK{5Mk zDsTK;crnAJq-4v6ZQI0(cE(JZHqEc-%j5JFg&Q|+6buUsyJV-PrRBw;=;G$qwqg5r z@xH#kLpN_mrlhC)L)1yWEM6S6vSsVmtsAy(75)DH{`?~+j$aRWtkfcqn3(wJ(b4Xx z+kVS`8r|7gsq@{i#)&~tP;esO!%SZ#B_$=ZP~J=r)oCi@qT&ty1IYzM-8Uz zE`P6NXD7!XATG{sYiqlw^0V72`|@`(U%q?^2n$m?x*|a1!ILK}txk<=*61+&^)D|d zFL%$+f4?GGY0-Z1$;L`j<(t#byOoxj?%K7BfuUc{_EP8WRmYDXPrkg&ckw%QLqoxf z7cXAQZgpzBawX(tX;qb#jEu~dTOwj&PHV%Im6e&<`D7RvB&4Jcoj%Q-nVGq`ue0M{ zoX@$A45uHzekrM|vmbb{T%zXZCss#C#{jLV5xdKDSFKvbz_51h+7I8pwKX?0gVyKY z&p%!9_gCqTUAqpQJPBHTcIocP_Dk2Vw}1FhaN=o^h6vXj+iEce0Xez8o*telQ>Pw0 zdD1iLq*O&qU~H^xO-;>)tgBi(cJ6%m_4W1UU+Y$`1g-u#bLPyFO$!z%WSXv7w~kLu zO)cu{{_Onx_LVDFhTWbrWy(bM$sNl~)Y4r{6q}lxC(fGHHQ&B|(W+Hk+1J)ge3g`( z{P4}2o>i-~=GasUS(m?KVVGl4$n^U9`s3Z=`WsSDi|wiY&d2cK)m7~!OP4NOvVfD}x|IxFvvp;iq^#Km14MX^{VNk=67q?t__Tj@BjPz z`{DEF`S0Dk=bSfv+O)Q=F0Lm}pDx_IS=fKR-O+DvZ!i7z?%g}bT)Wy|EXBp29iK9v z%*e~r(C*m88c@tT(^!dE-voIjzZ;UXJ#_j*49c$NiABun7R1*IoWTv zf)}qGebUpR=97@1kd%}Z5E9aI?b@~fyX6&G4jnpF@aoFS|6BI1T&bz0tvzw}>~8Pr zdJnIyjowl8wCkGL)vH%Ky1ShV3k|<~{krgChC%*4n-y1I3I3jEnk}}k_P5!JJNNI) zyZ6a7c6W297~S0PHa90HAu+LWx_-PD|Alw&-W@wL)A;}8YUM>M+n1YmW*l;Hb5k-i z6XR-iT7Bp2SzjyN$dC{gV`JkJDMmYX?|yu&SDK-LonP+3>+9h`T$0zyJfPoAV?iu$NcPQ1A()x+PP z{la{Ku&^+`SGUid^Lu^j4CAq5$1G}pne@q8n?0IgVr~v9KE0>uFkbj?ZEb99EFmG$ zbEB_gncliNf)`shZrU_yozI`xm>3x)W#xq-T8TM1Jxi9TF#O%`%FW5y*xbx~ZEbY> z(Qa{OhBt5DE(}`v;O$%9UN>fDX6Be31&&??|JJTt$tfr(cp<}t!64(PuC_LJZ*T97 z&FTFi%fePqoHVJa_V+i(r9lgqFK0h^@L)kn$(5|v@80ztZs&LAP>l63P_VG5xUuoW zhYt<=_SsccRZW;Mfq|Qwo8j+r>%V{gwB@?H1ka!Mr4h{!q+?kgA z`r29#508emYu`$^xVh9T&%36gB(-$0d;g>3{ql?7J-@y_o`Z`kDap;`Y~fQ#30VN8 i4RDu~$OY+t1_pnZqV0LozZe)87(8A5T-G@yGywoWIUg1P literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..94a6310612828db2370d19a094795341478e90f8 GIT binary patch literal 21380 zcmeAS@N?(olHy`uVBq!ia0y~yV9H})V0gyC#=yXkbA_{?fq{Xg*vT`5gM)*kh9jke zfq{Xuz$3Dlfq`2Hgc&d0t^32kz+hS88c`CQpH@EaktG3U+Q%9zlrzkYl?|8PND zXP2eo1Yr)12Z3y+>L&z@56dJKED8wBPn#AvD|1%bjkU9v#r1Al5_B-T?&v8--xFKr z9OYzx)1)~;v#aS;#kKeR_3yM9o75g$eEzrUW7pnma`X0vKf4@Ny?$x1mu8EA6NloF z+Vg96F@snw0;0;Dv%##+2}gW66k7xixp+KM11U>XQYjR6;!tcc5S+AwQ?W(BX$FVq zofd(qN~WfwbN$>C6BBp-){EZOlXrL5-K(X?`eaY9iQN3|RfJ-TfhkBw#>GXfrrFn? z9BSo0ddj-wMT6gRWmD6wRZPGCPt%L-nmk!}+O%mVnU`Aj)&8C|cP{UNNvhs6%yOf) z9t)qf+t*E4NNB^>t!rC4S_Jr+Ko&D>$+yZe|nSXk^zH!nK z4k^altDYG1p)nRLYJZzWulCds&;<0A!+Kn4GK770V z{<0M-R@~X~?afW+>3Xp*E-Ykb;FU5_P*Gv=_V#9ot9ZzIukLp)!@SyWk*}_-T>N*& z%$XZ=ZW=N4$=mm>U$4(AWpd!+#fhP-!=_g4oiRhg#>S>=YVDto$AA3(&Ag%RZ`G}B zx!p^bs+yUZ)%^T*@<_dn`d1VTMH73HeV&NdI9ZJr;N zduvOer>Ezt4I2V(-ng;i%9Rk-6;q~&ES(Z`xSc=z-@kvaPEFO;QdM=ew6Iul-u{1# z=F%&Vj&@I7ym;}izu)hNrll?W^zB<%gO-LyfS2aiJH_W$ZrETjb@JrJ=jK{3x3RUo zI!V=gN{|=Gv#YYNuUmR-TkdVMuP2oISG>HueD&Mg+gIPYb0_JmqM~Bft1By2`=a;P z?cKC(TiE{p|8}1`c`|U@wr#Jv_4fs^va)9V|M&OR#^Z9q?)`GQg?Fy54iBAcReI&* zWc95^{QUe|i=KKdof70b+bs0da=ZTCp2}6h%l%%RnQ5$LZXVv++PZSV0)?ehf;2TXxdIlu z^#)yE7aQE!*|{q1>?|!!O;63GSJ>ri7R1EFT>1F;_|%ITA)TF_uMV|xXI)i_N8v|&R){hyESD-F37-|>U;&Xyy&*VaU4eR^^->u?+ItCh>=1+lWS zMm-9Ph**(zWksOQ&B#qDov&_gULNG7DK9U-wc=ya+Purle4`$HJ1$?pVwPz(C@eF- zYKN@}xOeZK*UH=KmR@%~4*mN2dikrXtC!2wehK{gYW4b6hYmR{4SCslyHi*_WU*WC zmGk!h*Gvi0va*UQeSK~14ZFm|#H?dIl1tYmEaQ-YSN=lS{hWu2a;d#kLfYL}6@c{tb~ zWzWt?uDy5paO^l_bHw0fE4R3YhQ@-*&(ESFKR-L`>Fv#|U~4ODZM{3G)O@a8?X0FI zrZsEVu8iMr_x1Jl^fNOI|NMMDU$pD>%jNT*?S8-S@uQ>NM~)ralylR_?*E_9LErb* z{51Oe`+NGgH#ct`XI$<-KkfRu*jrm)UtfRz)6>&GANSjP*&d&u=)C3o->@|i2aC_! zy1%`(mGQ^7+xe0E>uk@?Hea58e%_y7uh)z2YLCvnv7zzYT})WTU$?_JjwXr%uM4gd3SeR`P|mh z^5LxceFhL=Q1j!1^*(V$m#dSG_;M`Fytl_vH+oyj-(O#OWGpsB#2#+rT^YETZMpya zYp3e||1HnS$zj;AZQHXmGmVeF%GQK$=lXc1%|yEG&CSEBzim%WPd}c&|8H2<-3^I{J9>Mc zJ~-GcV_nwsd|vgtzIU!1fnPxtk;vwfmqFX|?w(rQZx^*DX8m$H@yko9Q-a>y+dF%H-LKAbbFHV}D1UWjWz62HsXsqI7uSA& zeZ9Q8|GX!!*Y97pU_nDu6VtahHxF-2KJK+Qtyjvl;@{8boc#RHA06%f@#`1Ug!%J9 ziDqZbPb1T;D+}`O?wT=k>fiZ+OGS3>+BZgM=B2N%r9L?^ zF=l^V>}q~a&W9f!9yapPYU#YNa`xZPP8^DIR=2n1Cg0eQILEGb*OkO5#Up=RtM0T2 zJW>n|2srTP=VwXNtSKtF(00!w#gaES3}0Pe@9*ZuCamta=3POHfT)V|QcxR7K+OrH z(}_dTr$r?XDk5YKYBg~PE5e#sE4CcbH8&U6*Vn(bJ>P$K+1rZebITbj9=3{S+}xzf z*38&^&><;FsUW%SBd4%hLVkY#?Ag-4zrVkJPE=I1wYiy@kB`sA)ipHi1lWe}rk_8R z@3h{Ubv5brwY67P2DdjgG3_dSeeANoJ?{bg|3CfN_~l}9{_{$kO_(#sr>MwiciG#d zJ39*B+}hgREv|1={LJUuySv?f%MahZD_dV*zh=!EE$tGJqdR^FE&aOVWr@|RD=V27 ztmUoP%_*0hoGfXW#9}qK@7!GL$y2Agy1B8o`^{~T=62k@bLYobtJh~`vo2e@G%-DWxomcJ zws%xi)Xb+~kMYm;lGb9q@cOHYn%bnPQ;#+CAxFCMdJ&U=-!`OFy~ zy_g*V3v0Da3wN)q`}^z1j~^QfAG?8CeGiUV7wGT%p(J1TLy^JE%xsQjv77a?H#axm zxN##PIoa9C$!TLze*Sx*>m1zN$~rnc4E+53-`?Ggt~6Ka^7Qb~*y?-aujj4PAiLx; zjV}k!&3*RpVc^4BZQZRl_5W-lBO@7N_SellJKOy6lP4~2ZfpgQwJ&YWzW(UZBc_J^ z`|aavzlwTkmc6^2dV~2%a?5-nAy{C6{cOO1~e*LCHVxaKZkslh> zAMNJGrl_bWA}X3_BE`wUu^@hboz2fDljqpi%PkdQYi?Bap2l!tdw%@2_3{0C_u6jF zzW(gA{(hDPj?HX7v(0$@{rwr1`Oo)jX5-b;&Vj^v*izNJyu3Uai-H9%Kd-Hg_P!gx z{PM<(i;IF@J2^XZ^78Io{t?m>kg6;WLhdbeWR(`*yS~aG{oI_S+x|8mU8csAA8O(J z{=`ld+=)2i{CIumZTx?b!q_2;1KOtD4ad+Y4rA|5##33YY#Ha^*| zAg%`6>Te&82>V~yS6jVj&z=+K&MjN9qT|Dd0vlV~!s6n`m;LRpp1Zu*oqt~CGf5d) zSxM_M9k<03Pfk`Z{`;$xhmF~}osae3_1nv*DB0M^h>MHI#Kv}Zc50e0n+PoF-0 z+uT{Rx>m2&ZV-FSE?-mNKQD0VnaALMPoN+>pUi_77Z;mkU+Y;HyL(gealSo!_Hgj< zsHmy2`TP5?i`m)K+RExZU2kdJ-l{h@H@ENEW3#L5t=Ic|dl_%+tv2`c^fbx8H^;8_ z*M_vSQc6ln5jzSLJ3Bj7*X_=|Z6+ivY+3O^L0m89z~#%6*wU=8X6lPPB54- zYu2ewVf78$wq0AswmN#d-m_=VjvPJu@oIQ{qJhMVD=UR(&YY>DuAcoo^Xe+ms=f0b zJV@A&dU{^0x>I5m zyRr8FuG;m(}1g%nSBQ}s(bUAm;5X0a*wX(-X>a|zt(1L^me!P*NwBU>EzD%GBbVN!+!gJ z3Z|x0uUrYqy1wr0%HZYyu5F%Beh1XoEsS3sxmnH7&~VS6Pp4z{*U55ob8pMJd1yu8 zVh>;6rOC(pG?r#uSs|ExeH|+();>NyUi|8cW=&0vgh2vBL2_=fxw*Mj*_#ueo}S*C zUGo0k+&6D>Zf(!+_gntBncr?f=H+EF_H}y>6|%9h3CYR%d3l}MTg$Nh!pi-1QXd&6 zxISAwO=nq7)RMhE{aa?foM^T>j+2uU)C;Wn|Mz=&iBGAXQT)X{u5Di6g4L%5)FN&X zc+~jl?c2WxzEybjqxa!Mq8?3~J-d7Qbn&;hw=a*~UAE`*IctU`OO{+%8La;1?ORSB z9u<3g`G!4vY?zptH*Vj)Jb1a^QMK*W|Nnm9xPANaix($?dP%T~W~-O?ovf>?j$T_E z4NAM6ot-&%c1)aWUH<9W?EFu!R1|NMAdKF~{0 zTG~4xKwxXu)kUeNr`@=Hdvo>occ-|bL4_5+;o?BUZ?kLne64S{oKrTF+w!N3-|jP0 zOxq=YpWT<`e^2YH?Dj>Alz7;fd!@~fUB5p4+S=&g{dKix&gR#g$pTkVQVf}ynKy3U zWK77*^V?hX^~9MoI${5=CxcRv;=84#?3ZuWy<)olsk{GOtiHyh=TEq6$~E@RQrf*R zXgTvmF4LEDk?6#v`)3_^vRPGpXWNk(}J%5-2#_8OA8waPbqN&HqR%^{Xz7|rAeR#co|D>r?T@w=(pFMjv$F^EbT3VVRCnv|E z_!-Z(ZQDe|#EO1?N|mdAW4QLuvgrPup%D=hiHV5^dL=vLul$UD=WgV@BXi<6Bil=9 z;41S@&gSds`OlpE>Sq2nT@yTcFXO#X@f-0Ww*p@N{QUgnsZ&f3etmtNb8nAidU|?I zO-;n!DpQ8Qz`!r>@5?jP*Vivvw1@#D2I^W^&E*4$EM9!_^l4>P)z&9Zo>)hH2?fQ- zqD5<7NB&v+>r_~#ZJwm0WJ^m+KuAc-#S9e%g@(Gnzb*ssHqf9BLIjQErm=s=ewO>*8D5xlAOLiIre;0;N9p?Yod+MoGG~~H6Pk8+-_ie`QrTq=X&Rp{fet+ zuL;!xr5#+OZ3J4Bpdl_|2JJw`xGYu8OFuW~;PK<_+j4JTS{G~m?EHNG13i+)8#Zn{ zm|&1#AQ7>@Zg1I{-|zQ>S`2-CeL1(bcxvC@QK;P4*LUOgZDB#dz(_@IP?+4gen$K4 zk!R0pzFc%Kd2?gpn>RUk?%w_Q^(*UxtKsoazg~~m-&P+KBy{oOMH$&fd6n>(R@Xh3o6?(QyUPRP&qUmdoVfrE=n$;>Pa(%!ZyS@lGA z`;{vpd@>de%l+mufNI~oyu6mSwxXApR2eQ^y0l^Q=F5E>w{BI{)m?k02;`lPcHgC6 zcif#adGg@}j?FX7^W{LDhdVn8K~1H(e#f_DUY=o+DHLDx@#y`1ekmy`6DLk&m}6T# zt)-=9f307jp{1qer#-$MTc*F9Xl4w`MaM&KtqBs|-qggz#w&H?!a`?Bivk4(-&rOX zU%B_o`R=d#%dlbN#)oHS8b{r|xjB8~#*K^`oBxC*CMLGDwlY3=_^@&N_V2d)|l8C{=I)m ziAiN;<&q^!Cd`@BGkv=Fym|9f)YT_Xn&ebhXLs@9#UHOTUqt=^i1;Z?6@Ob z{`wm4(vZ$i`)Yqrt2}dMW$;X^(yW!o@4T~sM2F+d&fB0~`kkG{H#enn^YQVSWL@dF zb0@}co{i_*TU#ZK(|8mW6=%#m`G_0rh0uj(j=X)l{eIe=9fdW&UM_!8vMZ^y{@+h^ z?XWcuUa#M;rlPXsQnLtH?RL*QVe4WdpE>qQo2NZG(m7RV0;sTP(TLsoc+UTy=j%_L zJI9uAZA~PPjKze%zrTl9C4luU+EFCDJ#uqeVrJ&aZ*OmdY8*++B9+I-dRKSd0OexE zBf6)2@8tQN{CJq(UO-IjnDKcV=eoMOrR?CQrDB)1dEw@b8xzmWFg$YP2&i@b>C-0z zsoqdQNCm#bb!X@8RiUdteEyt#d71B(_3`t&#q~d(um87MD+xRpFR)#soax@Jt=W$s zKR!IuIDJdMkd#?Y#LGr-yS(GkDc?KGd}klqlzKYj?yjw7^Jn`lPkw!E?NqrrU?2XN zUnIOebamLr+xhzgH>aIl)!_~*hn=P@yi@sZ*=EA|PJLopSrJS?VbftCAND z-QC<Sl+ySTUcDIUDmr|g@%lbOhG|`f|k~+y!YT5#C}cE4UITa~@>C@V92_wL<~U%x;DeN|sy9er|ga%V?}f|63x&f@2r z5)ZRYnmpOl&kr=%*CTD-7rVQx@Z%%bv~zP#-rHM!@ny-5-Mb&Zf8RfK>ec&uNnc1|K7M2Iceiz@nckc{n6zN8UTaQGe-}1%9&(Ec#rcRtV z(b36?Y2UtmCK(qP7~Z2^1=HMHQ;MITW1KK;THD;YbIW?xuGP)V%uJmIE<l zZOygS>RR!BetkCi!b)#btCpMvw@?kInrRysRzIB@E@5A118R~T?~|3WD&gSf=AJWm z?$2MZ*H`}ec>J_&rK2NbGaGNxiwg^RWGn<4)O=@6m^iU<>sC`qNlBBuI}!}i($eSV zST2^W{`Mv^Cnsma#*Lca6wc4H?cTnyKZ*E*>9$-`Z|tP}f9d_eXEuoXK9lciGh6zkdshih?7) zt7}pH|9??G54Cc0a&aYHUl)62bvS=^cJ`k?e=1(D-M(eL|KmrGD*pfd{@y|k9NEF) z?~ZepXCLpAt@-=)`i@<@k_;pmW*DdQO;-0;Q&4DF>OK9^p335|wNb1N_xII;MuNY6 z`*vkr>}>u0e}o#?c%=+-Z<*NG+A_|ttKIc&VG}459yz?^lF;_NySo-n(c1d%xO}}% z;IhjvBerA&uKeBJ)+Qt+RptL!*oouggO$O{`7A9hLyOj07C-xNEjmB*w!h6smi>P| zc`uF4e{y2to4dQkmx|=x+LCy4Q|gtK!OJhLQ-A0G)bBuaNJxliAt-CBrJkE(d32k$ zaVGPRACLP%&G&Xb*+(Cb%cnm))H=1wwy<8yys)^SK*7dl&XOfdASJ?&+j;vk>?%Ga z%$YMs!m4D28P}Hd^K$bl9&v(tHPfey-@JKKL{wBzPOfkHa`n8tJW!E)tFr3Li-X4J zZJHM^R!%Kl2NM!rTv*5u^?rI#m#X))2ag^#tzNBdXlVHKSjpv& zkB@)+^y$*J;NalG(o)t7X=wpog@1p2tv1tcHqd>_H&xE2V#13T8E(B&trs&?OiZRM zSg=4SxEkcW?~|F?`3k~V2%ENpB-!otnVm#c@bi#d4cP}5TH z=|St`Y(bNEadC37yUUiAzrV)-8u4ASL}iwFzTbSi+KA0*yn=#)Ix#yY9654C=z8qt zG~VTYb00l8*c`K?fYIOIzv|12z%-w`yGk$jl@=8#+1t;bGDRe6TaKrX4^LxbW7OYn z{e35XeSMvIdz-F&?Uz7Rd&fe1t?kFpUb-~t;$rvDr}g(6l)sBfJ2$6u>sC`mMa4N* zrK_}(K(*4l8PlhWZ`-zQ#>|W>(asCAb1v@97&%eKKuKN5Mp}leblhu4*+}x~Q{q2pW z@v{9~92^&_UjCc412oIFWcl*PZ{DnVt1PD{T6H}4?ygcE34?}CVRfU-ODg|<-~a#V z&F1r$w&&kp)e*>{*s>#eYu43Eb=O~i6%iAgwsU<@7b~~egc&nDl9H4(H8rF9Z^yT* z{68m<>Y9Y!-*?_vwKvW`AVA=L-S4-t#}$3TTkjQi_x7sV*vw&O=lgIgd;P^Fp2BSW zaz69zYF89e zrc4p(aZ@%lG`w{IGzP!rZ~oQ2)#Z;KKi1UJdi3Y#=bYQy*50VjewulAmuc1Bd6Apb zGWX_sY35Gx);zka^mRc|(XQX@iaudG-|vvMDp_&ne6Rh_CzGq?Kuv+b$gFE?K#{7y z|Buo2_3_6K9ct2z-e!<>MI$IU_~ng_%3hjMrddaBZOvx<@a0R&pC2FRSe0_g%geLz zNHE-q*m(Tx+1@@`>rFL3i{45xv+*QcT@|_||GwN@zvJxkH4c}T`C8Wfsc3!7!^5M} z_h!lz*jfM^n>kaah&*uHBqk(uXsUL2fY-%+wbf$!aW(w&dkjRaLa(J{|Bv_Z;Q0Lf z{PMcLzZhb6m2`SeR{MBVJU+y;q5SaS!_9lEzlW7e@bd;os=3@z02kV;{GFVbKx3Qt z>;M0il~>PAi(0k$>^qfw&GHprE-ZA8y2J(=Y;R;{e`jX}F8K<>Px;>Iaa+92PvgMR zZt?uzP0IzGI6f};@a@~a$M?(>wSsrP-{CvUq*E_;*M~o!&o7>m^!3$MpSf0B5e*gd zLTSS!mUr*o9XWoS*<4L5=k~T-cuQ#Y1*Ok^etr>=k(tg*_W%E9-EaS|VksMS`q=rFUQcwFo$MRG-QS>G_)`pa1uMBH!}Z zXHI^0?U(h8Q_m(ItJ}=K_~W+CIh-H2Pno^r=FOXd>*MxX+kNEL-*eztueA4+{l8wV zzBNh7eEJzLFE1r4t67WP`!^LncKi3Szdj%e9LP=D_iA!)N51x?M)soLR_rc5b0 zz7br`o!Zs>GRMBOG5e?M(NpWiMLrgPV%=vwCC+zZXS?H0=cQ|-wjR2Ddv^M~%4IjC zve)nZb}P64^~J^RE4!ahQ!_K0wqSum*40(6r>E&|+`c{i`@6dgph37*q1p^hO-&uW zy;J*@TLhd!W-q-R?4^=x4jR1pJU@8Glc=MPo$YQrZS(r=|5@zYx9`dG=ac8oUHkXW z+E#AyN6((QWn`={vyX|2k}4}JyJfd++qNZ3mrjk;0~t2G=k)deVS5GY-FMpNz2Ebh zkKy{d*u`aUZ`Hp}dVH)m`PP=q7bRB9d}o*WPe0ey)1zW!B-F4u{rsc}6AJv#X`h-t zd9pIBy5Ic!^bNDh7=4Ys$9WGdYEx2I_rG=K$c$6-Hx~(SkFWnbb)vHSq{)*Hzu*7= z+`(q{pI`O~Jzx{r3aq-LR>*ZmC`>U<1zv-BnnK7hJ`wt%sG2Nfi zyJMAd_NSR*nfjNNbIa;aT1_}M#kAeq_nuDHi%y9rUUS!^WEu-6Yt^4SC95^pqEKnJ zS?;5a$K{w8)c*dKb9>v{b$Zj_J+WKj%m4lR=i=@@dHQsBFE6f>Cr?&XRz7_B(zB?@ zNK;eu$@AyJva-GV_y3QL1&vVc5dU-AeDQ8)XPvAU-BY8lT)2F9q2l>nrmL5oIC6Nw zog=&&-{wg+#I4s)KL4MSk8hdxbiFwig-p@W(bM!|Pf6$RX)g%Mt^5w=A9}*t8rN!N=e0OY5<>wt`Z=+hj?=O2R zb>8MPkDs64mh9_&Cnu{<-}(O5*6hmfcgx?q&Jdk6V@5>Isp+M!uic%j8M|li-kZD2 z^%?wazlLnwTGA@8Xw%G@FC&c3%HhGJJrdo$& zrKca)-~Xq{Z~5W{3mSfYe*XB)o0xZ<;p<`|7v(%TF){U>ed((!LDwDL-rm06Ece9R zsxKGaEsLITK#T3Ov(2YlUWpPsJIgfr>#M8fwKcA1W}EjX9&WpP@plVohT6`~?#c7# zmsf}Do8{a%aM|BJHfcY<-H!&4vriY~Ya1F)+*|!U>+SRR@9WQ3`?T}RpWFZcZ+@hG zI7i^pGiOS^nBTZ?zuGfPi|H!q>helUOA84Lzkl_2+pA04^W*D& zJY;|1W+W*iv*v2-o{EXj=T-B8#*NG0#pEPCY-Z=bwA5Q%Mn>kxo6YALJbZn1%@%&} z4PPIpYGNXyudi=a^1>k}M~C6`G~MI}2O7V;yv)wd&VH}rG4Ic+tUcRQLrYko2@v&Z;|9?K;`gT@s|E;ar*}t!?iJaUgYhCp1 z%}pW0c{Y`kqPAvT+>q$Z5EK;j;?h#?b?esc`TOlQs8xBRGGEHR&gS$qUFHXO%kP7R zZN$aJ4;?xb@amY#WMOr`Cuhy?A31%x+bs9ihtKEjC(oaM{>#hDot>Qr6AT1|g%58^ zJq_w8G?;O2KA3Re(4nUJb-z}gd0z4NS7}pIQ$bl-+JOefnEn6u>u>w>_pkTg`7DYp z0*@R+@*b5vI>I?kKmOd#;^#j;pSS<{Y<9jy*_(*aBW3<9tgM2vvc7qFdhL9&i-MQ? zMcuW2zsI?qPu8O3g+TGMGl@q!1f%XQbZ)<~t5kdczhBz#_x;v;`7+gi#gBW{?>|17 z?7yk{d)}kRk3nc`#yRb3YJ@@z2@VLY~I|^m&Y9z9=Uvt@>{`mO#=JfM&YsEkRe!u_vD=u#Cn|rIx zKR-L0{OrukD{CSrm*20Qe&B$EkgzZ(506gSy4jNr&d)OC=I7_1SMjLRXg+8e%gWf@ zX2<(vL0aD2-tKSRKXs~TWo2bbT3Xl(IZ&rEI4tkcw@;__!&khid;08|TUM5qcKA9S z!za6Hf0woK%kv$W>~H6pmZlcIE{1VI{r`Uv5fK+QBs$mB)NJ8w^cEHucXx4NVQXg0 z&(AL^F8=xV`+de2S67RFeRcKmM0dG`xwp5?v8$am*SdVtlqoLP*Tou2@VL0T7XJG3 z^7g&ql`~AMf8#YXkulv#X@9*#IudS`EEbQ#YiHBHn4?T)bO-Z@1B2YQE z`gosg)ceP0(*x%H_)%e1`l@C1YVGLlc~4(lTpV@xKqE6_L3#Q54@XX)JbALEy?t}m zRV~nJsAt7@w&l+Lwrur#N_u#x71WF8=jYea)!n*l!v+Hx8JQUtg-I)qM@2*w z{QUH^=>5q%cVre{T#@_y*|V(we~#P#J9uwz^~TMclaKXCM#(O?`}}ORj=!HDA0s1U z&9|HBQT_3e=QrBy+_^I%BI3?PL(r&`rq<@;y7u<-=gg7e;^yvNJ0oCw-d(SH$NzEM zyu456mft&gb?Xl)mAkvk(|>+?dZ4#_x0G3q$K73}jnbzJo<4gvY358%Cnu(P^X8?_ zyMO+CKP$Ib#`_vsi-HH+K7!Uw$Xb^byt<f4)>|Nj2| z`DXL^F5b!e{(ie1v#(}m<>zNpD}B?`)8AGx#O^LrwX%|OsAgk0JKJ1cQ?ql=9-B>@ zHvRZ`T)y$YY5}jbnTopla;N`%cizQ0IXhQ=JSzV3-rn6|Vb_Y^TwKhq9kzzy!HJ2= zNAJ0YczrxBUw>hdYj@FOLC{cj>`dD{+v;yi($CM^^WhM;N&dY(TFJYf=j7(jG)Q#n z{3L0dCLkr%HG8)7*6izQT3TMKi;Ig7A3xsCEv^UZanCZ%4tpl>05ocAQK*!cmnYHd z7PT!W^3C7%`?IdB5Ij9iSJ}d1#-2Sk3~A@*`OY@W<>2OC>_6Y`*6zIRmtKGU^7?vz z>gj2j8|T?pPy2qanqOUA{jKb_kB^TVXI)v5bo|?ki_M;s)fP^1T7KEE{$EWSkK~~{ zJBzz{ciy^rQ&2zvG+w3~y$v)1!vGq@ySUi>@uNphmzVkG&pvVQ$>Yb1d29V<8o6rQ z*ZwkD9lrkAlarHgosyhi|F3f9`NBIp3iteY)NNsH{rJtBp5425m#wS){q5q;;`B3T zPycm@;!tccsI)5GT)mj*`T6JvN*gd?Q!-+%DWvb@) zqYI==vltR`bIMPp3lDjRf(A(+9V`g%jJ7oH=D|@}5goMZQb8}Z)i-7t)Aq#hV-O1bj=dv$S zN8t@*UDu-r4;t352Q6J-%2R0)R`WS<@1C5wxjAS+Q&t$hKI zI2RWeh7ItsNi`#*NrjJ(F`5hQooSrDDf_x!+53BE&&)L5n({tz%G|kUZ){8kwRTqq zFZU~L2Q}h5w8GLJO`ALS>{RV=uRkg2>ERiIALVSTE-Y|tPD{Z|TV?8B{;aSqGB7Z3>fOt0A~$!bW*0v@v+_zd zzl_BMMrO8v<$khax=}~6*Y7UAuet?%#WNwf;V|CG+x? z;`6r4d)$&s+k;m9`0?WvpW_`yh!<`y?zdYtWmjRoZS}X1W;WiSzP`S!6B86~)xR!# ze{b!sl9!9_+_?j4!S5=1+NGtV6S98)zg?d`dycdC)zx)ncK*JVXU?1n`Tgzf)d`BuU9q<_Ojccwua5D_*>KQIx#v<&BNW zr!HR(zU}Af8K|eHcdM+XW{;6nZ`PX|8?}^_79IIJ-=ff|vbs9^_W!@{>qDQPn>)3; zyL*;ZY1YQ)pU)Vd&-lIf%O&qur}g)TcxjpzKl6F|@ZrK+TeFwD^~=^`5??KV8fO32jE4PQ2*Q4$C>yppTGIeorNk~j|OiEHZJzZbF?5=6{wFxt3te9(W zV>73xr^hIH*VM_Ag@6A1d24HOQW6Ib&y`ot&&~CQ3_Mxr@BdTOS$%I~GP|sG*_Cae z%HUY9^zny>+qG@npa1yy_~zzxe)m3^hrhnQ1}#B}icL&Z++F@Y?cScsGjl8_&oa&a z^ltZiAqfeOq@<*c5f6_UWF;jjZB9FT=N|LjJ*9k3^eDFb;aX0DBuqsJjlQ!Z@0(kysn+yJiobC z4`01Hb!~05W!0AzVde!792oB0z3bh(*94`h>lhOwBPb{+!Nay^@7|MV&p!Qrzdrr; zw%l6*D`q3LeYwusDZcYty7%$6qNk^VPS^kWcw8veYHpuvx7gHO^%)r&3l}c@ZekV; zYP;y@>PlJ`rIdoo=BdlV)<)fWU$yt&He+#(r4btv96K*gZ)s@}S}atbK4p=TAuAi( zr$3+18y7rqD6x{|T8v)qoZTZhlA{(R?E>>IyyQ#YJL{&{5Wg&>|_(Et#9w# ziFrH!|BvJLH+Glnmz9;BnPKR>dbjDnPxGGpT3T9yR&3u)pPy-4=cT#t@3&|fS=rOJ z|H9yn;6;yCm8=K}xdWOH`~80Z`SkguYNdpFFwp$i{Q$&fThmwRsoZn2-nB zb>QWtq_5wgj1b5(W$n8#Wl+xpPOtsw87$Hb}tS-2B$|{PUCj?M_0+q~B|t zJ8-~Z-_K{#OiWA-(*OVRadIk}n23ajhZ`9g&9JMj>U<2E;%Q`N-?Aj*-=Cj4QCm2| z!>=b@j=KqO_cAgvmb|#Y*zoJ?YvZaf88I<2A3lG++%_fu`0?ZIOO~kcusLshc>D5l z|IJljv-bUX#LdIT%+}nvxB9zL?ky88F0O*&;>EGM%Y0^=@ir7bJ~nakv}Ky|9^@>wFeg$*DC8NT$KX*E2<}W zR?n2n-2Cb9*Vm_Cg-tY@ZIIY>YfokI%2M zc$rV)%S%f?)$2HMd}OG8y>>fjtgE!W(0}KS9T!%IuYbk&xKVB9!CDv1qo9>6GYpfx zYL+Uu2rSCEckfdIeMwaxq9 zcy_kAu)O^Fhlkt2t=5VK61`oe?3W{V*|zqazP@o?`oR-I=zeSX`E3azsTb_JriRZ0v?ecXedL)gz-X5T-?gttTIGAwY=FLd! zw40k!XUv=_8m)fe=+V|ISFYSXwz2B#E3cVfj1?6d>;C?l8g+epe*C6Qn;2FEFF$9y zf1AM4lRIku=3nJ5w4Zcj?W{fj|NY*Qb=50+Mug3etgEYz&N9v3QSi`d<=5kVvWr)& z=$K#sFEZLhT0upn<-`dO(30JvCmq|=lp5Bq)qTJB`#ssCfsviJ->!+@KkwJq*WoWs zTQ{sr|G52C>kfI(M`65uGM0xHxpx2fzW;x(pSu1rJDoFU&vJ5eKYrYAueY`C>8Yta z@^*9X?X4D;mR>#e|Ig?1Bch|37hDHT9Ig$!zCPZ*PtF#!s@H3dzMfv6UhFQ=2-3Xz zf0ac=ML(WQ_CImv%#q{A+2!Tu3mpgTqtFaqmS7<9;lqc7|Ns8}`1$kVuF~vjI+09w zA|5KkS3q~`hSfejB{~mz&2*r3ekSK@&|U%P((1Y8_g0=r7Fl%2tt`9KmA7_fshH`P zo>%uabtwxydcS(wk+rVfVmo&2%Br_K(94-oTU%RLP;lV-_37Tz^%%|nO`2hl$n@>o zw;eloZmj?R?-j?-d74^UNmo_`o|$JW9UXo96`y;^#_4fYolj3sFaG)IsZgn*p&@7} zPe$gZsGGGqe0|#Ub91ATwx*qx0>x*=2ZiWuIf~%s5)YOJ zR;pe1jvW@0CQk+p$#r&izPY`w&_9V>fqehJoqh4{!Uh-QwScpfgde>NjqZ=G zsj&g6zq?z!p`(MN>gy}lq9UWcRbQ2ql$aV2dod-`_1Qe}8WcsNW!d z!nutn@%g#AOO`CjNVP71r=qFZxh{5hl7U1=SJ$I2FE5LBeSCJSbKSaiC9keFTVad^U@Md+v;yA=jYinR8&@8TotPQrONi;!Gk&X_w7CN z`pD6vg5u)t?R>H?ZfsOO-Y>6iX}QzMkSkCSvh-0sX@d4Tv#%Ku%hlGIeeIBVq7~f8 zF}d`9O{kQ#w6~KJQ^Di8a{2lB&(6*D&dJeHcI$Ciei<~Vv1!w$6DLkwXzTOw^z!1` zoPPe<_4xXuXMEgN%7+Q7`z=X6-Un(}MBO|!Rok-ekA-g37KS_?%evohw?{SI-&L9o zYP!C-xH$953c;B(XWrWQ?AbG)Stgx3i=StudTG|w)^>lr6ME&W>h&{c&UESSPfJU4 zOH6#2+yD3Vh5v8kSOdHaQ%{K;>ya!zZpalFn3iVsOFHMt>%fl(R!n>2xcu_P-R1dr z?%oA$F?n!jXK|F#HMHLDMo4crwDv;%cQJg@WH|7i%~YIyP_Uw>Ir{^wF%O3IRHEc0xu!(Q** z9iMY!L*wP;{*T|itGj>nTlv!`PizXlhe&pHb!~ZmWoL1^-One&LBYX4pG@}Ouw~1W zCnqOI6{VU;dHVP~xt+g1_2MGe9$D*YQCqVXUVmNrxYzv0&*$?&k(rs9dCPKs-LFhB zy_gf{=2~ao+M)@H^!WXCA|fIT0#Z_5*Vn~<{Q7li`uTaFEzYWTQ%;;cn|i!Y_R8w; z^+$f@UyF>4OiWE}-MiP;YHr_z2?Blc_I^o8O1ZbTC|X#|`0}NMhmAQbEKEg3Ma263 z+2;Y9(|q3^W?|sw=Dt_;S~n+WN&1uZPtMN(FWg*qDb1?%bN$oRCwnA~R|GHTn_v4) zl9Q7&B075V#EFgDx0|c`%}F>j!?2>dT3AMgr(yBp#oOW|ZES3|ls|vRUjOGKJ81sx z=+Q(2iB;Bfs`V5V8T0eszv6QanXm=EHgvhw?Kgg(rfJD*?XCK{%FL(u>#NX5j~*2i z7N&pND9XSfbP{wfz`^_X=dW0y!LVr2q8In~+ZQ}`eYt+$FRrB`*5&V(#Oy6vDgLjwZ?)&rU^eVoe`H`E`c-#NSSAUlixmsyg@Mm{XV1S~bfR;j_a<6C^ ztIH9;3kuT&J~(xl7!(#cdf!SjdCMX>`-(&F_K4`2^F2#?e0|usO=ir`;O$yumLwfm zxU=P!1d~=mvPclVhv_jomQA;rKl9JV=Wq5sPn`ee$C>JLzwMSTU9x=n zY{&>)#7@Qeg|lrcjgB5YDk3ia`RDWbpKoQaXZ&!0nIE*V@@TiXWz`pr$H#h^Z_O{S z{{43Q$`vaXY}s;U$NSl)J1Re`X@{*zxV|p7N6Pfoy5oT>Lv&(yUHNeC&ZNTdnu|Emzjk>RK7R{L$mbhhJS?&2Fm( zT3vnXqgk_V{eC5DS(M_yU$A@k?#XlK+Af%} z&%2)6ykW@_6}^}p2Y!BjZdv+DWarME-!(uhD-KzCJ}m18O`=bqe*9RkG<)iv6DK_Q z#GH9^bMwb<-=5tqzt6g0)hf^ogx>WX zrLV;}IXPKaSs%WCpP#dPOZE4>q@<*dj*bJjZ_j@6B;|4Xr%#_i`}=oo30<{z?b~f@ zFD-O#@96C{-S=+B$KsL_lYQ6k-J93Yz_4odYW81SLFwmkmq^}s&@l~$h7;f3-v0Q> z6BCW(V@HopoH^68xY$@#RrSm))6*4ts;aE@_4Qs}UN<%*GM~5mz2;+#rzhv}KH10e z|9`N9_V91pw(WQ}sC88N`B`8@#EFlOk4J9Jn!54k&al<0+S=WhE(Jk$B6n?Eduf_( z^sX(TMdju1x2?VO=jUh3+Fx4=@AR4Z&%Jv6dhvq;jE4>#V*9hVR(|VN%{SKZhY#hl zvaGW{bSRhgjdi?p^z2CS3EYY;5-ehuEfXy&|9-vB$;YRrrImHz*p&>EkkC-js>_s= zB`0?_H8FjAcbEIXp32WFRxKKn)?AwlAruzE*e}8|M zp5L9Fm-p<`)6)zst*sB=yx{?jE`SDlzJC2WW%~5ZNk_Sk^+-0ytp{xy-}_H?+qP{k zE-nQR4lqvBi9B?$nf>OT%FQ45986HKvzs?nJKU)9)05plmhHQL>y}8}yt=+7{q>74 zZ)Ym_8*RJ)=Z-1mv=y+Upt*E&8@#y@$M|TuH z=HTH;d2?gqvEt6o&Wh^l&#UAAt~&p?^Z%{*nCze}r5Rb7PoErpEu*pW=uMxEoQf?H z7CS?&I=)odo}Q-rxWE37^15~Fn(gYGoR~B{;5YSFb>WHpc1a-W2{hE?++<_x3i)+uC#O>-Sx+u)3vHx<%pjov_R|H#XXC z3!Pwn+Af9b*nhp4Ei_18she+N6JPG7XvIoMl!w_jLUwI7FK zOX-9|xeZa#(I-!zW`1xMJl}je<>7&ZPhY;IJUulvwr}~eWkv7qSei)jt`1*smY2fw zc6)@gvoj|jU)tGOraN};{%-MamT7iIW~Srfiw_<@K0M1byQ=s2)vHtQ?k@j)OgewT z#f%xY)n?*)F$}lT&nYP>?b7jU_$Ij?Cd*TOLpy=_4W1j;J7$H2M30_y1F;F zwyyTS-~D6#YF=MvEBzLMLz`yqeR_4T!4%!~US3`m&t|5dICaYCoQm%ZgG7&YxtdG1 zc0Y~Sv9{sTtTwsxt8M30y5>%(iIn`=8!3Ns=GTeS!tV=*uZuZ&_;B;n)6+MXy^Sid zl3ly@?)_DpN;XMd{H0r6BE(l_c6VO!hRunUCzdGod2lEmNtkpfw_(-l)tNatOSW#E z+SJ7K?EL)u&s&+NElDi#kR{eOmdT6J#Xrys~wTj>7km zz7NY@Tu_v&d?L8aXXc@0zO&B=9WFrV+1zz#?bfYRL0ykKcVbSRK7I1Ui3NN1)SP%M zac*)E^4wy?19$&)8Ml`@Ab!&W7mK3cZ?$_m># zg_6nrw+_$r^$(pne+4WuZ!84 zR1+H;+uPp$a!o)|l2T`9r?c#=gpBILU5%4#AW3abqI6cd;$md061?gy<{RToEo?qnfVe8YoiC4w_u!P~0~=)aEf{iQkd};lCf&*Z<|V zx3_;>@8{>YWa-k4Sy#1|`OWQeTm0~(`urvP_T9U)uWL0 zP1EYNE@;>;`PG$`KYsj}FlSEAGY{AHc}2^vW^wZI9Xo%%|G)tUE^h9#va4RCyuP;f z$B!QmUcdIPsj)eG_H0O4m{ImM9Z(v%>~G&2x8A&bMy#gO9oFW93wG?>sTmp?dg|ff z_NmjRiMa-fs9IV^diwd96_+fso?E<3qpRrWr&LuNo0yNEJ_UhB%8cFBcU`tjKi(%B zeCW`jDbJpzxdw_neZ77^RLi%?^Z&Sjn#Cfnfm52<`L&9lpEJ!bIJ9EduBe$7g^Lzl zehHGfaUSa92=Ya?1f9Y)pMQ)*Z2Q3oo$})wscZi+O$>0=eFhFKlkeDYDvo?6$XC0 z9}8wK7QXv(j`374*W_en23}s?9lN8;%jZ5kmSAw<%9WOb2@cCI8`k})=##ZRHc8c+ zq2k@n=QpSfquVi`gIV(BUnFfhWo}QjLcXnJnbFq_8 zPEJlmRrTT1r$-^GLX&`AqDbS8{Hq-}2Y~i&DGtW zclXhSh0f3TR{MjJ>a^Ljy&W7F^78VICGReO|7?BzU+wB|Zze|Pi>)X=r=_K}V$GVA zUteDO%(0mG>QxpK6BB6gC-SIOM?VOM2>U_DyIZ?w(c}em#_K2 zXlQ82aOBvrQ;%4;oCQ}@Qg=% zadB~=^(Ze&tVCQD?d|7hUS1|BCFNCCW_BU2LO*WLjWZL=RHZ+3)$J^OEe2Bj^=i0< zwKcOr>M4=D`}@u^^V=|N(GR$>F}dCHu*lq!>6_1~`OP_Tb94GqjtvTTpZcu7K6%1~ zhOpH~@78Yr# z%^e*b{pauBn_IKRO-)UY96eh2_!#e>zkfab{mu0_Hi-m5&MJ%G+zH)Ssk)><7*uI& z5Q0l-f;W9Du4xg09Muu##AUA7aul_*aH@%w` zx2SXZx3{;`-^5IuHLJ^O>7`%UKgdwV-C4J!QhCNjAvYpayKz5SIeQ%g%rhu>m~ z#m~+d=1FL^Nc;o2C1TFS#qPmjVQ#^}!VKR;#p3=CA3qxYJoEAK@rzkoQ_|Bn7d`c20PW)D=2q6$&OTtssko*Y zD7}9SvGQ`0?vk*R^4rOJ9dEfCe2mZQg8I{Y|H?uI`xR zhCZ`=QBhGB7Z-!%V?0hyPM~qq=^OX|c*O12CzHA9VPQ$hmO`<$ixx4Zr$3i7cgf4i zdGhb~d-I=f=Gj(P{WoeAINSzttGKv(TAJF~vuCGFnPQNAP3Q5>DN{uL{Qaw`shN1V zjo0S)o6QV=-j(lve9>M0>F4wI&rhn)e*$Wc|M@)szlyeYckpt*g;%pSZru2C>y3+9 zTd!QX5)c;VmYJ!kzvn~K$)5)fIBbd1jaoacql078q)7}OK0Yb8x8=^UF7MNg-ZtaR zs#1&6S0apzjB3Z{%$WlkxJ*q=O-W1p^yRXD`Olb)42^yJ_JOu>mA<|v+5h|3ucBvX zX1@Pr)hckfW$jL80jG|4875MzR;@a5;)H^c(WNIbj*g7g-`}w&+}l%mm>$#Oc$IZ|CoS`o8|Z^~Q}GkA2>rfB)U5nXALsKYI1**3%02g;hU3 zB!2q*x$^(t@4n~c?dw2?U1(`(ad2}#-gsONwDbYA#63U%eEq-A^2hpQZ$G}T(jp-T zPKRq;yTw2g(w)NUD^{=G{kl-Z_2JIv^FT{QYpeeM{eIuSu+XrbUq0=4pX{Hf`t=X@ ze!pj2=^q&>xpL)7(1xEIH*Q?GcI{a0_q*+3tKXL9u(PvENlERJiQkaGxH@dDl7WGM zy83cC`~M#f^FMy}%&oH0($Ue;p}Nr`>&gndzqw8vuhq7DwF-!7D=IclnIdu}YwO96 z6D9~eUVBbh-4C=$X!2y?;%8?bp0EFB%&>Fk&O3W5g+cRJn>KIWS@u@y!oQw7OTDL8 z?Roe0_4Ul`?8hsY&r7({H>+0Em(|8v2j`q`5wpt+PEFBbPVd`sH6 zW5{rRAozu{Z@{YQ@7EaVxVLQG%2+aU<>`|rA6{DO{pU@( zy|LWotgWD#|M)+T#MeY`@0&JF?9rn~pxt99)1RN2d02b>o~A__$6R@3A3b_h^+(Ui z*?Hr}jSH79ZS|dP_VC#=wk`Ladb_$_eJpnBc<8#cB(6-gMc~oA>Yq=iNA4(ixFAPY zNLcvrGT+%h9`e^)a7^S>)R}unOVp`D@!PA_>la;q$!60Rw_e@KDyqv&ai4q2G4P2a xhrCYoKz683op1;$;_(pNkW0i Date: Mon, 8 Oct 2018 14:26:43 -0700 Subject: [PATCH 082/411] Add custom call with layout constraints. Add a variant of CustomCall which specifies arbitrary layout constraints on the operands and result. The existing non-layout-constrained CustomCall is changed to have no layout preference and can now be assigned arbitrary layouts by layout assignment. PiperOrigin-RevId: 216249615 --- .../compiler/tf2xla/kernels/index_ops_cpu.cc | 22 +- tensorflow/compiler/xla/client/xla_builder.cc | 43 +++- tensorflow/compiler/xla/client/xla_builder.h | 22 +- tensorflow/compiler/xla/layout_util.cc | 6 + tensorflow/compiler/xla/layout_util.h | 4 + .../xla/service/gpu/gpu_layout_assignment.cc | 10 - .../xla/service/gpu/gpu_layout_assignment.h | 2 - tensorflow/compiler/xla/service/hlo.proto | 9 +- .../compiler/xla/service/hlo_instruction.cc | 28 ++- .../compiler/xla/service/hlo_instruction.h | 10 + .../compiler/xla/service/hlo_instructions.cc | 33 ++- .../compiler/xla/service/hlo_instructions.h | 32 ++- tensorflow/compiler/xla/service/hlo_parser.cc | 101 ++++++++-- .../compiler/xla/service/hlo_parser_test.cc | 67 ++++++ .../compiler/xla/service/hlo_verifier.cc | 22 +- .../compiler/xla/service/layout_assignment.cc | 108 +++++----- .../compiler/xla/service/layout_assignment.h | 13 -- .../xla/service/layout_assignment_test.cc | 190 ++++++++++++++++++ tensorflow/compiler/xla/shape_util.cc | 2 +- .../compiler/xla/tests/custom_call_test.cc | 50 ++++- 20 files changed, 650 insertions(+), 124 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc index 3d81ae9eb8..f210bfbd88 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc @@ -88,20 +88,30 @@ class ArgMaxCustomCallOp : public XlaOpKernel { xla::ConstantLiteral(&b, xla::LiteralUtil::CreateR0(dim))); } - xla::Shape xla_shape = - xla::ShapeUtil::MakeShape(xla::S64, output_shape.dim_sizes()); + // The argmax function expects row-major layout. + xla::Shape xla_shape = xla::ShapeUtil::MakeShapeWithDescendingLayout( + xla::S64, output_shape.dim_sizes()); + std::vector arg_shapes; + for (const xla::XlaOp& arg : args) { + auto shape_status = b.GetShape(arg); + OP_REQUIRES_OK(ctx, shape_status.status()); + xla::Shape arg_shape = shape_status.ConsumeValueOrDie(); + *arg_shape.mutable_layout() = xla::LayoutUtil::MakeDescendingLayout( + xla::ShapeUtil::Rank(arg_shape)); + arg_shapes.push_back(std::move(arg_shape)); + } // Tell XLA to call the custom code, defined in // index_ops_kernel_argmax_float_1d.cc. xla::XlaOp output; switch (input_shape.dims()) { case 1: - output = - xla::CustomCall(&b, "argmax_float_1d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_1d_xla_impl", args, + xla_shape, arg_shapes); break; case 2: - output = - xla::CustomCall(&b, "argmax_float_2d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_2d_xla_impl", args, + xla_shape, arg_shapes); break; default: OP_REQUIRES(ctx, false, diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 6b31831010..e7cf9ae363 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -1279,9 +1279,10 @@ XlaOp XlaBuilder::AfterAll(absl::Span tokens) { }); } -XlaOp XlaBuilder::CustomCall(const string& call_target_name, - absl::Span operands, - const Shape& shape, const string& opaque) { +XlaOp XlaBuilder::CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape, const string& opaque, + absl::optional> operand_shapes_with_layout) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; if (absl::StartsWith(call_target_name, "$")) { @@ -1293,6 +1294,31 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name, *instr.mutable_shape() = shape; instr.set_custom_call_target(call_target_name); instr.set_custom_call_opaque(opaque); + if (operand_shapes_with_layout.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return InvalidArgument( + "Result shape must have layout for custom call with constrained " + "layout."); + } + if (operands.size() != operand_shapes_with_layout->size()) { + return InvalidArgument( + "Must specify a shape with layout for each operand for custom call " + "with constrained layout; given %d shapes, expected %d", + operand_shapes_with_layout->size(), operands.size()); + } + instr.set_constrain_layout(true); + int64 operand_num = 0; + for (const Shape& operand_shape : *operand_shapes_with_layout) { + if (!LayoutUtil::HasLayout(operand_shape)) { + return InvalidArgument( + "No layout specified for operand %d for custom call with " + "constrained layout.", + operand_num); + } + *instr.add_operand_shapes_with_layout() = operand_shape; + ++operand_num; + } + } return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands); }); } @@ -2690,7 +2716,16 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque) { - return builder->CustomCall(call_target_name, operands, shape, opaque); + return builder->CustomCall(call_target_name, operands, shape, opaque, + /*operand_shapes_with_layout=*/absl::nullopt); +} + +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape, + absl::Span operand_shapes_with_layout, + const string& opaque) { + return builder->CustomCall(call_target_name, operands, shape, opaque, + operand_shapes_with_layout); } XlaOp Complex(const XlaOp& real, const XlaOp& imag, diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 2e14e47a35..9ceede7a79 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -577,9 +577,10 @@ class XlaBuilder { absl::Span operands); // Enqueues a custom call instruction onto the computation. - XlaOp CustomCall(const string& call_target_name, - absl::Span operands, const Shape& shape, - const string& opaque); + XlaOp CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape_with_layout, const string& opaque, + absl::optional> operand_shapes_with_layout); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one @@ -1197,6 +1198,10 @@ class XlaBuilder { friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque); + friend XlaOp CustomCallWithLayout( + XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, const string& opaque); friend XlaOp Complex(const XlaOp& real, const XlaOp& imag, absl::Span broadcast_dimensions); friend XlaOp Conj(const XlaOp& operand); @@ -1732,6 +1737,17 @@ XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque = ""); +// Overload which constructs a custom call with fixed layouts. The operands will +// have the layouts specified by |operand_shapes_with_layout| when provided to +// external code, and the external code is expected to produce a result with the +// layout specified by |shape_with_layout|. All shapes in |shape_with_layout| +// and |operand_shapes_with_layout| must have layouts. +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, + const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, + const string& opaque = ""); + // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one // of the operands is a scalar, or an explicit broadcast dimension is given diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index d310335618..3c8db9aa45 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -65,6 +65,12 @@ void SetDefaultLayoutToContainer( return layout; } +/* static */ Layout LayoutUtil::MakeDescendingLayout(int64 rank) { + std::vector layout(rank); + std::iota(layout.rbegin(), layout.rend(), static_cast(0)); + return MakeLayout(layout); +} + /* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor( absl::Span major_to_minor) { Layout layout; diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index b78883c2d8..af032b1cae 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -40,6 +40,10 @@ class LayoutUtil { static Layout MakeLayoutFromMajorToMinor( absl::Span major_to_minor); + // Returns a layout with descending ((i.e. {n, n-1, ..., 0}) minor-to-major + // dimensions. + static Layout MakeDescendingLayout(int64 rank); + // Creates a sparse layout with the given maximum number of elements. (This is // a convenience function for protobuf construction.) static Layout MakeSparseLayout(int64 max_sparse_elements); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 1ffe855750..8c9a8adc61 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -213,16 +213,6 @@ Status GpuLayoutAssignment::AddBackendConstraints( return Status::OK(); } -bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) { - // - Inputs to cudnn batchnorm custom calls don't need the major-first layout - // (i.e. {n, n-1, ...0}) -- we can handle any layout. - // - Inputs to cudnn convolution require custom layouts handled in - // AddBackendConstraints. - return !IsCustomCallToDnnBatchNorm(*instruction) && - !IsCustomCallToDnnConvolution(*instruction); -} - Status GpuLayoutAssignment::PropagateOperandConstraint( const OperandLayoutConstraint& layout_constraint, LayoutConstraints* constraints) { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h index 4ba7989e9c..6a48e55fd2 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h @@ -46,8 +46,6 @@ class GpuLayoutAssignment : public LayoutAssignment { Status PropagateBufferConstraint( const BufferLayoutConstraint& buffer_constraint, LayoutConstraints* constraints) override; - bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) override; private: Status AddBackendConstraintsToDnnConvCustomCall( diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 1ea26ddd5b..a0eb9e6ddc 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 56 +// Next ID: 58 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -184,6 +184,13 @@ message HloInstructionProto { // Sharding for kDomain instructions. xla.OpSharding domain_entry_sharding = 54; xla.OpSharding domain_exit_sharding = 55; + + // For custom call this indicates that the layouts are constrained. If + // constrain_layout is true then the 'shape' field must contain a layout, and + // 'operand_shapes_with_layout' must contain a shape with layout for each + // operand. + bool constrain_layout = 56; + repeated Shape operand_shapes_with_layout = 57; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 2f6db7cd7c..5c3908a9a4 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -396,9 +396,22 @@ StatusOr> HloInstruction::CreateFromProto( operands(1), operands(2), computations(1)); break; case HloOpcode::kCustomCall: - instruction = CreateCustomCall(proto.shape(), all_operands(), - proto.custom_call_target(), - proto.custom_call_opaque()); + if (proto.constrain_layout()) { + // A proto RepeatedPtrField cannot be converted to a Span (it is a + // vector of pointers essentially) so create a vector of shapes to pass + // in. + std::vector operand_shapes; + for (const Shape& shape : proto.operand_shapes_with_layout()) { + operand_shapes.push_back(shape); + } + instruction = CreateCustomCall( + proto.shape(), all_operands(), proto.custom_call_target(), + operand_shapes, proto.custom_call_opaque()); + } else { + instruction = CreateCustomCall(proto.shape(), all_operands(), + proto.custom_call_target(), + proto.custom_call_opaque()); + } if (proto.has_window()) { static_cast(instruction.get()) ->set_window(proto.window()); @@ -1142,6 +1155,15 @@ bool HloInstruction::HasSideEffect() const { shape, operands, custom_call_target, opaque); } +/* static */ std::unique_ptr HloInstruction::CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque) { + return absl::make_unique( + shape, operands, custom_call_target, opaque, operand_shapes_with_layout); +} + /* static */ std::unique_ptr HloInstruction::CreateTuple( absl::Span elements) { std::vector element_shapes; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 374862c4b6..44f776ebac 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -734,6 +734,16 @@ class HloInstruction { const Shape& shape, absl::Span operands, absl::string_view custom_call_target, absl::string_view opaque = ""); + // Overload which constrains the layouts of the operand and result. 'shape' + // and 'operand_shapes_with_layout' must have layouts. + // 'operand_shapes_with_layout' must have a compatible element for each + // operand. + static std::unique_ptr CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque = ""); + // Creates a tuple instruction with the given elements. This is a convenience // wrapper around CreateVariadic. static std::unique_ptr CreateTuple( diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 152d8eacdb..2ec233eaec 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1825,7 +1825,24 @@ HloCustomCallInstruction::HloCustomCallInstruction( : HloInstruction(HloOpcode::kCustomCall, shape), custom_call_target_(custom_call_target.begin(), custom_call_target.end()), opaque_(opaque.begin(), opaque.end()), - feature_group_count_(1) { + feature_group_count_(1), + layout_constrained_(false) { + for (auto operand : operands) { + AppendOperand(operand); + } +} + +HloCustomCallInstruction::HloCustomCallInstruction( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, absl::string_view opaque, + absl::Span operand_shapes_with_layout) + : HloInstruction(HloOpcode::kCustomCall, shape), + custom_call_target_(custom_call_target.begin(), custom_call_target.end()), + opaque_(opaque.begin(), opaque.end()), + feature_group_count_(1), + layout_constrained_(true), + operand_shapes_with_layout_(operand_shapes_with_layout.begin(), + operand_shapes_with_layout.end()) { for (auto operand : operands) { AppendOperand(operand); } @@ -1843,6 +1860,12 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const { proto.set_custom_call_target(custom_call_target_); proto.set_custom_call_opaque(opaque_); proto.set_feature_group_count(feature_group_count_); + if (layout_constrained()) { + proto.set_constrain_layout(true); + for (const Shape& shape : operand_shapes_with_layout_) { + *proto.add_operand_shapes_with_layout() = shape; + } + } return proto; } @@ -1870,6 +1893,14 @@ std::vector HloCustomCallInstruction::ExtraAttributesToStringImpl( if (!opaque_.empty()) { extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\"")); } + if (layout_constrained()) { + std::vector shape_strings; + for (const Shape& shape : operand_shapes_with_layout_) { + shape_strings.push_back(ShapeUtil::HumanStringWithLayout(shape)); + } + extra.push_back(StrCat("operand_layout_constraints={", + StrJoin(shape_strings, ", "), "}")); + } return extra; } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index e169604072..4c5fc759a3 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -1053,10 +1053,19 @@ class HloSelectAndScatterInstruction : public HloInstruction { class HloCustomCallInstruction : public HloInstruction { public: - explicit HloCustomCallInstruction(const Shape& shape, - absl::Span operands, - absl::string_view custom_call_target, - absl::string_view opaque); + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque); + + // Constructor for a custom call with constrained layout. 'shape' and + // 'operands_with_layout' must all have layouts. + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque, + absl::Span operand_shapes_with_layout); + const Window& window() const override { CHECK(window_ != nullptr); return *window_; @@ -1085,6 +1094,16 @@ class HloCustomCallInstruction : public HloInstruction { // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; + // Returns whether the result and operand layouts are constrained. + bool layout_constrained() const { return layout_constrained_; } + + // Returns the shapes (with layout) of the operands. CHECKs if this custom + // call does not have constrained layouts. + const std::vector& operand_shapes_with_layout() const { + CHECK(layout_constrained()); + return operand_shapes_with_layout_; + } + private: std::vector ExtraAttributesToStringImpl( const HloPrintOptions& options) const override; @@ -1106,6 +1125,11 @@ class HloCustomCallInstruction : public HloInstruction { std::unique_ptr convolution_dimension_numbers_; // The number of feature groups. This is used for grouped convolutions. int64 feature_group_count_; + // Whether the result and operand layouts are constrained. + bool layout_constrained_; + // For layout-constrained custom calls, this vector holds the shape with + // layout for each operand. + std::vector operand_shapes_with_layout_; }; class HloPadInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index dd62988bcc..96f9ff6654 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -174,6 +174,7 @@ class HloParser { kDistribution, kDomain, kPrecisionList, + kShapeList }; struct AttrConfig { @@ -240,6 +241,7 @@ class HloParser { bool ParseSliceRanges(SliceRanges* result); bool ParsePrecisionList(std::vector* result); + bool ParseShapeList(std::vector* result); bool ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result); @@ -1341,6 +1343,7 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, optional window; optional dnums; optional feature_group_count; + optional> operand_layout_constraints; attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString, &custom_call_target}; attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque}; @@ -1349,12 +1352,52 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, AttrTy::kConvolutionDimensionNumbers, &dnums}; attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64, &feature_group_count}; + attrs["operand_layout_constraints"] = { + /*required=*/false, AttrTy::kShapeList, &operand_layout_constraints}; if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } - instruction = builder->AddInstruction( - HloInstruction::CreateCustomCall(shape, operands, *custom_call_target, - opaque.has_value() ? *opaque : "")); + if (operand_layout_constraints.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return Error(lexer_.GetLoc(), + "Layout must be set on layout-constrained custom call"); + } + if (operands.size() != operand_layout_constraints->size()) { + return Error(lexer_.GetLoc(), + StrCat("Expected ", operands.size(), + " operand layout constraints, ", + operand_layout_constraints->size(), " given")); + } + for (int64 i = 0; i < operands.size(); ++i) { + const Shape& operand_shape_with_layout = + (*operand_layout_constraints)[i]; + if (!LayoutUtil::HasLayout(operand_shape_with_layout)) { + return Error(lexer_.GetLoc(), + StrCat("Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout( + operand_shape_with_layout), + " for operand ", i, " does not have a layout")); + } + if (!ShapeUtil::Compatible(operand_shape_with_layout, + operands[i]->shape())) { + return Error( + lexer_.GetLoc(), + StrCat( + "Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout(operand_shape_with_layout), + " for operand ", i, + " is not compatible with operand shape ", + ShapeUtil::HumanStringWithLayout(operands[i]->shape()))); + } + } + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, *operand_layout_constraints, + opaque.has_value() ? *opaque : "")); + } else { + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, + opaque.has_value() ? *opaque : "")); + } if (window.has_value()) { instruction->set_window(*window); } @@ -2533,6 +2576,15 @@ bool HloParser::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kShapeList: { + std::vector result; + if (!ParseShapeList(&result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } } }(); if (!success) { @@ -2825,6 +2877,23 @@ bool HloParser::ParsePrecisionList( parse_and_add_item); } +// shapelist ::= '{' shapes '}' +// precision_elements +// ::= /*empty*/ +// ::= shape (',' shape)* +bool HloParser::ParseShapeList(std::vector* result) { + auto parse_and_add_item = [&]() { + Shape shape; + if (!ParseShape(&shape)) { + return false; + } + result->push_back(std::move(shape)); + return true; + }; + return ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, + parse_and_add_item); +} + // int64list ::= start int64_elements end // int64_elements // ::= /*empty*/ @@ -2832,23 +2901,15 @@ bool HloParser::ParsePrecisionList( bool HloParser::ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result) { - if (!ParseToken(start, StrCat("expects an int64 list starting with ", - TokKindToString(start)))) { - return false; - } - if (lexer_.GetKind() == end) { - // empty - } else { - do { - tensorflow::int64 i; - if (!ParseInt64(&i)) { - return false; - } - result->push_back(i); - } while (EatIfPresent(delim)); - } - return ParseToken( - end, StrCat("expects an int64 list to end with ", TokKindToString(end))); + auto parse_and_add_item = [&]() { + tensorflow::int64 i; + if (!ParseInt64(&i)) { + return false; + } + result->push_back(i); + return true; + }; + return ParseList(start, end, delim, parse_and_add_item); } bool HloParser::ParseList(const TokKind start, const TokKind end, diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 255123d331..17538c05bc 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -802,6 +802,43 @@ ENTRY %ConstantUnsignedNoOverflow () -> u64[] { ROOT %constant = u64[] constant(9223372036854775807) } +)" +}, +// CustomCallWithLayoutConstraints +{ +"CustomCallWithLayoutConstraints", +R"(HloModule CustomCallWithLayoutConstraints + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[123,4]{1,0}} +} + +)" +}, +// CustomCallWithLayoutConstraintsNoOperands +{ +"CustomCallWithLayoutConstraintsNoOperands", +R"(HloModule CustomCallWithLayoutConstraintsNoOperands + +ENTRY %CustomCallWithLayoutConstraints () -> f32[1,2,3] { + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} + +)" +}, +// CustomCallWithLayoutConstraintsTupleShapes +{ +"CustomCallWithLayoutConstraintsTupleShapes", +R"(HloModule CustomCallWithLayoutConstraintsTupleShapes + +ENTRY %CustomCallWithLayoutConstraints (p0: (f32[2,2], f32[42,2,3]), p1: f32[123,4]) -> (f32[1,2,3], f32[1,2,3]) { + %p0 = (f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = (f32[1,2,3]{0,2,1}, f32[1,2,3]{1,2,0}) custom-call((f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={(f32[2,2]{1,0}, f32[42,2,3]{2,0,1}), f32[123,4]{1,0}} +} + )" }, }); @@ -2069,5 +2106,35 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { op::Broadcast(), op::Multiply(), op::Add())); } +TEST_F(HloParserTest, CustomCallWrongNumberofOperandConstraints) { + const string original = R"(HloModule CustomCallWrongNumberofOperandConstraints + +ENTRY %CustomCallWrongNumberofOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "Expected 2 operand layout constraints, 1 given"); +} + +TEST_F(HloParserTest, CustomCallIncompatibleOperandConstraints) { + const string original = R"(HloModule CustomCallIncompatibleOperandConstraints + +ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[555,5]{1,0}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "operand 1 is not compatible with operand shape"); +} + +// custom call incompatible shape. + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 496fe1795d..be3bee5975 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -360,7 +360,27 @@ Status ShapeVerifier::HandleCall(HloInstruction* call) { return CheckShape(call, call->to_apply()->root_instruction()->shape()); } -Status ShapeVerifier::HandleCustomCall(HloInstruction*) { return Status::OK(); } +Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + TF_RET_CHECK(custom_call != nullptr); + if (custom_call->layout_constrained()) { + // If the layout is constrained, verify all the respective shapes have + // layouts and that the constrained operand shapes match the shapes of the + // operands. + TF_RET_CHECK(LayoutUtil::HasLayout(custom_call->shape())); + TF_RET_CHECK(custom_call->operand_count() == + custom_call->operand_shapes_with_layout().size()); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + const Shape& operand_shape_with_layout = + custom_call->operand_shapes_with_layout()[i]; + TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(), + operand_shape_with_layout)); + TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout)); + } + } + return Status::OK(); +} Status ShapeVerifier::HandleSlice(HloInstruction* slice) { return CheckShape(slice, diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index cc4a342e9d..ad65b147c1 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -419,6 +419,16 @@ Status LayoutAssignment::BuildHostChannelConstraints( return Status::OK(); } +namespace { + +bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + return custom_call != nullptr && custom_call->layout_constrained(); +} + +} // namespace + Status LayoutAssignment::AddMandatoryConstraints( const ComputationLayout* computation_layout, ChannelLayoutConstraints* channel_constraints, HloComputation* computation, @@ -434,7 +444,6 @@ Status LayoutAssignment::AddMandatoryConstraints( // Constrain layouts of instructions which define values with pre-existing // layouts. for (auto* instruction : computation->instructions()) { - Shape const* shape_with_layout = nullptr; if (instruction->opcode() == HloOpcode::kInfeed) { // Infeed layouts must match the layout of the original inserted // instruction. @@ -456,17 +465,21 @@ Status LayoutAssignment::AddMandatoryConstraints( if (parameter_layout.LayoutIsSet()) { // Parameter layouts must match the respective layout in // ComputationLayout, if there is one. - shape_with_layout = ¶meter_layout.shape(); + TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( + parameter_layout.shape(), instruction)); } } - } - if (shape_with_layout != nullptr) { + } else if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(*shape_with_layout, instruction)); - } - - if (instruction->opcode() == HloOpcode::kSend || - instruction->opcode() == HloOpcode::kRecv) { + constraints->SetInstructionLayout(custom_call->shape(), custom_call)); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RETURN_IF_ERROR(constraints->SetOperandLayout( + custom_call->operand_shapes_with_layout()[i], custom_call, i)); + } + } else if (instruction->opcode() == HloOpcode::kSend || + instruction->opcode() == HloOpcode::kRecv) { CHECK(get_channel_constraints(instruction)) << "Multi-module layout assignment requires ChannelLayoutConstraints"; int64 channel_id = instruction->channel_id(); @@ -621,31 +634,6 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR(constraints->SetOperandLayout( false_computation_layout.parameter_shape(0), instruction, 2, /*mandatory=*/true)); - } else if (instruction->opcode() == HloOpcode::kCustomCall) { - if (!CustomCallRequiresMajorFirstLayout(instruction)) { - continue; - } - // Add constraints for kCustomCall instruction operands and instructions. - // For now we only support major-first layouts for all inputs and outputs. - Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout( - instruction->shape().element_type(), - AsInt64Slice(instruction->shape().dimensions())); - TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(result_shape, instruction)); - for (int64 i = 0; i < instruction->operand_count(); ++i) { - const Shape& operand_shape = instruction->operand(i)->shape(); - // Opaque operands don't get a layout constraint. - if (ShapeUtil::IsOpaque(operand_shape)) { - continue; - } - - Shape row_major_operand_shape = - ShapeUtil::MakeShapeWithDescendingLayout( - operand_shape.element_type(), - AsInt64Slice(operand_shape.dimensions())); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout( - row_major_operand_shape, instruction, i)); - } } } // Finally set the result layout to match ComputationLayout, if there is one. @@ -676,16 +664,18 @@ Status CheckCallLayout(HloInstruction* call, return Status::OK(); } -// Custom calls have fixed input and output layouts. -Status CheckCustomCallLayout(HloInstruction* custom_call) { - for (const HloInstruction* operand : custom_call->operands()) { - TF_RET_CHECK( - ShapeUtil::IsOpaque(operand->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout())); +// Operands of layout-constrained custom calls must match the expected +// constrained layouts. +Status CheckCustomCallLayout(HloInstruction* instruction) { + if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + custom_call->operand(i)->shape(), + custom_call->operand_shapes_with_layout()[i])); + } } - TF_RET_CHECK( - ShapeUtil::IsOpaque(custom_call->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout())); return Status::OK(); } @@ -932,9 +922,7 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, instruction->to_apply()))); break; case HloOpcode::kCustomCall: - if (CustomCallRequiresMajorFirstLayout(instruction)) { - TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); - } + TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); break; case HloOpcode::kFusion: TF_RETURN_IF_ERROR(CheckFusionLayout(instruction)); @@ -1554,11 +1542,11 @@ Status LayoutAssignment::CalculateComputationLayout( Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { // Clear existing layouts of the instructions. All layouts must be assigned - // by the LayoutAssignment pass, except for those on infeeds, parameters, - // and the computation result. The latter two are specified in - // computation_layout, so we only need to keep the existing layouts for - // infeeds. Clearing the layouts here avoids hiding potential bugs in the - // layout assignment pass that may accidentally use the existing layout. + // by the LayoutAssignment pass, except for those on parameters, the + // computation result, and a couple special cases. The former two are + // specified in computation_layout. Clearing the layouts here avoids hiding + // potential bugs in the layout assignment pass that may accidentally use the + // existing layout. for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kBitcast) { // bitcasts are inherently layout sensitive and so a bitcast instruction @@ -1567,7 +1555,9 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { "Unexpected bitcast operation seen during layout assignment: %s.", instruction->ToString()); } - if (instruction->opcode() != HloOpcode::kInfeed) { + // Some instructions carry mandatory layouts in their shape. + if (instruction->opcode() != HloOpcode::kInfeed && + !IsLayoutConstrainedCustomCall(instruction)) { LayoutUtil::ClearLayout(instruction->mutable_shape()); } } @@ -1802,6 +1792,18 @@ StatusOr LayoutAssignment::Run(HloModule* module) { } TF_RETURN_IF_ERROR(Init()); + // Verify computation layout is sane. + const HloComputation* entry = module->entry_computation(); + TF_RET_CHECK(entry_computation_layout_->parameter_count() == + entry->num_parameters()); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + TF_RET_CHECK( + ShapeUtil::Compatible(entry_computation_layout_->parameter_shape(i), + entry->parameter_instruction(i)->shape())); + } + TF_RET_CHECK(ShapeUtil::Compatible(entry_computation_layout_->result_shape(), + entry->root_instruction()->shape())); + // We do two passes. The first one we pass a nullptr ComputationLayout to // the RunOnComputation() calls (for non entry computations), and we register // the ComputationLayout which are naturally flowing in DFS fashion to the @@ -1873,7 +1875,6 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kCrossReplicaSum: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: - case HloOpcode::kCustomCall: case HloOpcode::kDivide: case HloOpcode::kDynamicSlice: case HloOpcode::kDynamicUpdateSlice: @@ -1930,6 +1931,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kConstant: case HloOpcode::kConvolution: case HloOpcode::kCopy: + case HloOpcode::kCustomCall: case HloOpcode::kDomain: case HloOpcode::kDot: case HloOpcode::kFusion: diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2d48e12263..cb56f4cd19 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -333,19 +333,6 @@ class LayoutAssignment : public HloModulePass { const ResultLayoutConstraint& layout_constraint, LayoutConstraints* constraints); - // By default LayoutAssignment ensures that inputs and outputs of CustomCalls - // have the "major-first" layout (i.e. {n, n-1, ..., 0}). - // - // If this function returns true, LayoutAssignment does not set a layout for - // the given CustomCall. It's up to the backend to set one in - // AddBackendConstraints, if necessary. - // - // Precondition: instruction->opcode() == HloOpcode::kCustomCall. - virtual bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* /*instruction*/) { - return true; - } - // Called after layouts of an instruction have been finalized to allow // subclasses to check for platform specific assumptions. virtual Status Verify(const HloInstruction* instruction) { diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 2c549cd872..ff6fdb5e4a 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -65,6 +65,27 @@ class LayoutAssignmentTest : public HloVerifiedTestBase { FindInstruction(module, name)->shape().layout().minor_to_major(); return std::vector(minor_to_major.begin(), minor_to_major.end()); } + + void ExpectLayoutIs(const Shape& shape, + absl::Span minor_to_major) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + EXPECT_TRUE(LayoutUtil::Equal(shape.layout(), expected)) + << "Expected layout " << expected << ", actual " << shape.layout(); + } + + void ExpectTupleLayoutIs( + const Shape& shape, + std::initializer_list> minor_to_majors) { + int i = 0; + for (const absl::Span minor_to_major : minor_to_majors) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + const Layout& actual = ShapeUtil::GetTupleElementShape(shape, i).layout(); + EXPECT_TRUE(LayoutUtil::Equal(actual, expected)) + << "Expected tuple element " << i << " layout " << expected + << ", actual " << actual; + ++i; + } + } }; TEST_F(LayoutAssignmentTest, ComputationLayout) { @@ -1102,5 +1123,174 @@ TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) { EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0)); } +TEST_F(LayoutAssignmentTest, CustomCallNotLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallNotLayoutConstrained + +ENTRY %CustomCallWithNotLayoutConstrained (p: f32[42,2,3]) -> f32[1,2,3,4] { + %p = f32[42,2,3] parameter(0) + ROOT %custom-call = f32[1,2,3,4] custom-call(f32[42,2,3] %p), custom_call_target="baz" +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 2, 1})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {3, 2, 0, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 2, 1}); + } + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 1, 2})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {0, 2, 3, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {0, 2, 3, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 1, 2}); + } +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrained + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(f32[4,4] %p0, f32[2,3] %p1), custom_call_target="baz", operand_layout_constraints={f32[4,4]{0,1}, f32[2,3]{1,0}} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + // The custom call should be partially encapsulated in kCopy instructions + // because of the layout mismatches. + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Copy(), op::Parameter()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(custom_call->operand(0)->shape(), {0, 1}); + ExpectLayoutIs(custom_call->operand(1)->shape(), {1, 0}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedZeroOperands) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedZeroOperands + +ENTRY %CustomCallLayoutConstrainedZeroOperands () -> f32[1,2,3,4] { + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall())); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleOperand) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleOperand + +ENTRY %CustomCallLayoutConstrainedTupleOperand (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + %tuple = (f32[4,4], f32[2,3]) tuple(%p0, %p1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(%tuple), custom_call_target="baz", operand_layout_constraints={(f32[4,4]{1,0}, f32[2,3]{0,1})} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ExpectLayoutIs(root->shape(), {2, 1, 0, 3}); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Tuple()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectTupleLayoutIs(custom_call->operand(0)->shape(), {{1, 0}, {0, 1}}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleResult) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleResult + +ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, f32[2,3]{0,1}) { + %p0 = f32[4,4] parameter(0) + ROOT %custom-call = (f32[4,4]{1,0}, f32[2,3]{0,1}) custom-call(%p0), custom_call_target="baz", operand_layout_constraints={f32[4,4]{1,0}} +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_result_layout() = + ShapeLayout(ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}), + ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})})); + AssignLayouts(module.get(), &computation_layout); + + ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(), + {{1, 0}, {1, 0}}); + + const HloInstruction* custom_call = + FindInstruction(module.get(), "custom-call"); + ExpectTupleLayoutIs(custom_call->shape(), {{1, 0}, {0, 1}}); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index d244923532..7f0201942b 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1645,7 +1645,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, } std::ostream& operator<<(std::ostream& out, const Shape& shape) { - out << ShapeUtil::HumanString(shape); + out << ShapeUtil::HumanStringWithLayout(shape); return out; } diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index a693fa3595..001490c6a8 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -105,8 +105,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) { LiteralTestUtil::ExpectR0Near(10.0f, result, error_spec_); } -XLA_TEST_F(CustomCallTest, - DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) { +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) { auto module = CreateNewModule(); auto b = HloComputation::Builder(TestName()); @@ -130,6 +129,53 @@ XLA_TEST_F(CustomCallTest, Array3D{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result); } +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) { + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + b.AddInstruction( + HloInstruction::CreateCustomCall(r2f32_, {input}, "Add1ToValues")); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + // Note, the expected result is transposed! This is because the input and + // output layouts of the custom call differ and the called function just + // blindly adds one to each element. + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 4.f}, {3.f, 5.f}}, result); +} + +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) { + // The argument and result of the computation are set to different layouts, + // but the custom call is layout constrained to a fixed operand and result + // layout, so the correct result should be produced. + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + + const Shape& r2f32_dim0_major = + ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}); + b.AddInstruction(HloInstruction::CreateCustomCall( + r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major})); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 3.f}, {4.f, 5.f}}, result); +} + class CustomCallClientAPITest : public ClientLibraryTestBase {}; // When using the client API, CustomCall targets can't begin with '$' -- these -- GitLab From af5b714179ff5e279ba27c024f453e2d75636ac9 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 8 Oct 2018 14:43:55 -0700 Subject: [PATCH 083/411] Add more logging to the convolution transformations. PiperOrigin-RevId: 216252980 --- .../xla/service/gpu/cudnn_convolution_algorithm_picker.cc | 3 +++ .../compiler/xla/service/gpu/cudnn_convolution_rewriter.cc | 3 +++ .../xla/service/gpu/cudnn_fused_convolution_rewriter.cc | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index 590c0a7d54..6d4a72038f 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -360,6 +360,9 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( HloInstruction* new_call = computation->AddInstruction( instr->CloneWithNewOperands(new_call_shape, instr->operands())); + VLOG(1) << "Replacing convolution " << instr->ToString() << " with " + << new_call->ToString(); + TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config)); // Repackage new_call so it has the same shape as the original call, namely diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc index ef29237301..437d25727e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc @@ -525,6 +525,9 @@ StatusOr RunOnInstruction(HloInstruction* conv) { TF_RETURN_IF_ERROR( custom_call->set_backend_config(GetDefaultBackendConfig())); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << custom_call->ToString(); + // The CustomCall returns a tuple (conv_result, scratch_memory). Extract out // the conv result and replace `conv` with it. TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction( diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc index 3761c19cfc..d508cbc2e1 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc @@ -234,7 +234,8 @@ StatusOr> TryRewriteToCudnnForwardRelu( config.set_side_input_scale(alpha_side_input); TF_RETURN_IF_ERROR(new_conv->set_backend_config(config)); - VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name(); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << new_conv->ToString(); return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0), new_conv, 0); } -- GitLab From b3bd7b378d00190fef831092836a5df62e39e7ed Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Mon, 8 Oct 2018 14:44:37 -0700 Subject: [PATCH 084/411] Ignore args and kwargs for defun's get_concrete_fn if `PolymorphicFunction` was created with an input_signature. PiperOrigin-RevId: 216253122 --- tensorflow/python/eager/function.py | 14 ++++++++++++++ tensorflow/python/eager/function_test.py | 9 ++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 99bf375ea7..ff138cad1e 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -664,6 +664,11 @@ class Function(object): return self._build_call_outputs(outputs) + @property + def name(self): + """Function name.""" + return self._inference_function.name + @property def graph(self): """Returns the graph from which this function was constructed.""" @@ -721,6 +726,10 @@ class Function(object): return nest.map_structure(lambda x: x.dtype if x is not None else None, self._func_graph.structured_outputs) + def add_to_graph(self, g): + """Adds this function into the graph g.""" + return self._inference_function.add_to_graph(g) + def _construct_backprop_function(self): """Constructs the backprop function object for this function.""" backwards_graph = FuncGraph(_backward_name(self._func_graph.name)) @@ -1133,6 +1142,8 @@ class PolymorphicFunction(object): *args: inputs to specialize on. **kwargs: inputs to specialize on. """ + if self._input_signature: + args, kwargs = None, None graph_function, _ = self._maybe_define_function(args, kwargs) return graph_function @@ -1322,6 +1333,9 @@ def register(func, *args, **kwargs): function definition into graph. Register function with different input param will result into multiple version of functions registered in graph. + Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was + created with an `input_signature`. + Args: func: the PolymorphicFunction instance that generated by a @defun *args: input arguments for the Python function. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index e46bde098b..953f4300cf 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1841,11 +1841,10 @@ class FunctionTest(test.TestCase): # pylint: disable=protected-access self.assertEqual(len(graph._functions), 3) - # Test input param shape mismatch - t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) - with self.assertRaisesRegexp( - ValueError, 'Python inputs incompatible with input_signature'): - function.register(defun_matmul, t2, t2) + # Test register function with cache, note inputs are ignored. + function.register(defun_matmul) + graph = ops.get_default_graph() + self.assertEqual(len(graph._functions), 3) def testRegisterFunctionWithCache(self): def matmul(x, y): -- GitLab From 220c0f90af05ed1ca86831258888cc80757654fd Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 8 Oct 2018 15:00:36 -0700 Subject: [PATCH 085/411] [XLA] Simplify loop nesting in HandleConvolution The calculation of a spatial coordinate in the kernel and activations is not dependent on which part of the contracted dimension (input feature) we are in. Rather than nesting the loops, the loops can be siblings: - One loop over spatial dimensions - One loop over the input feature group This reduces the nesting depth which makes the code a little more readable and might be slightly faster due work invariant in the spatial loop getting hoisted out. PiperOrigin-RevId: 216255839 --- .../xla/service/hlo_evaluator_typed_visitor.h | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index a450dc6ff5..84fbbd3e0c 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1072,66 +1072,66 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // Convolve input feature with kernel. do { + // Find corresponding spatial dimension index for input (lhs). + int64 lhs_linear_spatial_index = 0; + int64 rhs_linear_spatial_index = 0; + for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { + // Spatial dimension number for input (lhs) and output. + const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); + const int64 output_spatial_dim = dnums.output_spatial_dimensions(ki); + + // Calculate lhs (input) index without taking base dilation into + // account. + const auto& window_dim = window.dimensions(ki); + const int64 undilated_index = + out_index[output_spatial_dim] * window_dim.stride() - + window_dim.padding_low() + + rhs_spatial_index[ki] * window_dim.window_dilation(); + // Skip if the lhs (input) index is to be dilated. As an + // optimization, skip this mod if there's no dilation. + if (window_dim.base_dilation() > 1 && + undilated_index % window_dim.base_dilation() != 0) { + goto cnt; + } + + // Calculate the actual lhs (input) index after dilation. As an + // optimization, skip this integer divide if there's no dilation. + int64 lhs_spatial_index; + if (window_dim.base_dilation() > 1) { + lhs_spatial_index = undilated_index / window_dim.base_dilation(); + } else { + lhs_spatial_index = undilated_index; + } + + // Skip if input index is not in bounds. + if (!(lhs_spatial_index >= 0 && + lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) { + goto cnt; + } + + lhs_linear_spatial_index += + lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; + rhs_linear_spatial_index += + (window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]) * + rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; + } + for (int64 rhs_iz = 0; rhs_iz < input_feature_group_size; ++rhs_iz) { const int64 iz = feature_group_index * input_feature_group_size + rhs_iz; - int64 lhs_linear_index = 0; + int64 lhs_linear_index = lhs_linear_spatial_index; lhs_linear_index += out_index[output_batch_dim] * lhs_dim_multipliers[input_batch_dim]; lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; - int64 rhs_linear_index = 0; + int64 rhs_linear_index = rhs_linear_spatial_index; rhs_linear_index += out_index[output_z_dim] * rhs_dim_multipliers[kernel_output_z_dim]; rhs_linear_index += rhs_iz * rhs_dim_multipliers[kernel_input_z_dim]; - // Find corresponding spatial dimension index for input (lhs). - for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { - // Spatial dimension number for input (lhs) and output. - const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); - const int64 output_spatial_dim = - dnums.output_spatial_dimensions(ki); - - // Calculate lhs (input) index without taking base dilation into - // account. - const auto& window_dim = window.dimensions(ki); - const int64 undilated_index = - out_index[output_spatial_dim] * window_dim.stride() - - window_dim.padding_low() + - rhs_spatial_index[ki] * window_dim.window_dilation(); - // Skip if the lhs (input) index is to be dilated. As an - // optimization, skip this mod if there's no dilation. - if (window_dim.base_dilation() > 1 && - undilated_index % window_dim.base_dilation() != 0) { - goto cnt; - } - - // Calculate the actual lhs (input) index after dilation. As an - // optimization, skip this integer divide if there's no dilation. - int64 lhs_spatial_index; - if (window_dim.base_dilation() > 1) { - lhs_spatial_index = undilated_index / window_dim.base_dilation(); - } else { - lhs_spatial_index = undilated_index; - } - lhs_linear_index += - lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; - - // Skip if input index is not in bounds. - if (!(lhs_spatial_index >= 0 && - lhs_spatial_index < - lhs_shape.dimensions(input_spatial_dim))) { - goto cnt; - } - - rhs_linear_index += - (window_dim.window_reversal() - ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) - : rhs_spatial_index[ki]) * - rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; - } - result_val += static_cast(lhs_literal_data[lhs_linear_index]) * static_cast(rhs_literal_data[rhs_linear_index]); -- GitLab From 5da3cebe00111aa43e34b5a3fc12d1a97b838ba7 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 8 Oct 2018 15:02:13 -0700 Subject: [PATCH 086/411] Automated rollback of commit 09b0fc199129e0f487a39741bdf674cf09035cbc PiperOrigin-RevId: 216256115 --- .../core/kernels/data/shuffle_dataset_op.cc | 2 +- .../data/experimental/kernel_tests/BUILD | 13 ------ .../kernel_tests/random_dataset_test.py | 45 ------------------- .../kernel_tests/shuffle_and_repeat_test.py | 21 +-------- .../data/experimental/ops/random_ops.py | 21 ++------- .../data/experimental/ops/shuffle_ops.py | 21 ++------- tensorflow/python/data/kernel_tests/BUILD | 1 - .../kernel_tests/shuffle_dataset_op_test.py | 25 +---------- tensorflow/python/data/ops/dataset_ops.py | 22 ++------- tensorflow/python/data/util/BUILD | 1 - tensorflow/python/data/util/random_seed.py | 5 +-- .../python/data/util/random_seed_test.py | 13 +----- 12 files changed, 16 insertions(+), 174 deletions(-) delete mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 9f54c381a9..66466d6a36 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { int64 buffer_size, int64 seed, int64 seed2, int64 count) : ShuffleDatasetBase(ctx, input, buffer_size, count), seed_(seed), - seed2_(seed2) {} + seed2_(seed) {} string DebugString() const override { return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_, diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD index a67f6ff031..4eef9580ad 100644 --- a/tensorflow/python/data/experimental/kernel_tests/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/BUILD @@ -453,18 +453,6 @@ cuda_py_test( tags = ["no_windows_gpu"], ) -py_test( - name = "random_dataset_test", - srcs = ["random_dataset_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python/data/experimental/ops:random_ops", - "//tensorflow/python/data/kernel_tests:test_base", - "//tensorflow/python/data/ops:dataset_ops", - "@absl_py//absl/testing:parameterized", - ], -) - py_library( name = "reader_dataset_ops_test_base", testonly = 1, @@ -574,7 +562,6 @@ py_test( "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py deleted file mode 100644 index d403a575ec..0000000000 --- a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for `tf.data.experimental.RandomDataset()`.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.data.experimental.ops import random_ops -from tensorflow.python.data.kernel_tests import test_base -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import errors - - -class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): - - @parameterized.named_parameters( - ("NoSeed", None), - ("WithSeed", 42), - ) - def testZipRandomDataset(self, seed): - dataset = random_ops.RandomDataset(seed=seed).take(30) - dataset = dataset_ops.Dataset.zip((dataset, dataset)) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - for _ in range(30): - x, y = sess.run(next_element) - self.assertEqual(x, y) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py index 883169495f..c208963a86 100644 --- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl.testing import parameterized import numpy as np from tensorflow.python.data.experimental.ops import shuffle_ops @@ -28,7 +27,7 @@ from tensorflow.python.framework import ops from tensorflow.python.platform import test -class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase): +class ShuffleAndRepeatTest(test_base.DatasetTestBase): def _build_ds(self, seed, count=5, num_elements=20): return dataset_ops.Dataset.range(num_elements).apply( @@ -111,24 +110,6 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase): with self.session(graph=g) as sess: sess.run(get_next_op) - @parameterized.named_parameters( - ("NoSeed", None), - ("WithSeed", 42), - ) - def testShuffleAndRepeatAndZipDataset(self, seed): - dataset = dataset_ops.Dataset.range(10).apply( - shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed)) - dataset = dataset_ops.Dataset.zip((dataset, dataset)) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - for _ in range(30): - x, y = sess.run(next_element) - self.assertEqual(x, y) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py index 25d7fbf691..e3a2aeab31 100644 --- a/tensorflow/python/data/experimental/ops/random_ops.py +++ b/tensorflow/python/data/experimental/ops/random_ops.py @@ -33,26 +33,13 @@ class RandomDataset(dataset_ops.DatasetSource): def __init__(self, seed=None): """A `Dataset` of pseudorandom values.""" super(RandomDataset, self).__init__() - - # NOTE(mrry): We generate the seed-pair once per graph in which the dataset - # is iterated over, and cache it in `self._graph_seed_map`. This supports - # two features: iterating over the same `ShuffleDataset` twice in the same - # pipeline and observing the same order (by tying the seeds together with - # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, - # which requires the stateful RNG op to be created inside the same graph as - # the dataset. - self._original_seed = seed - self._graph_seed_map = {} + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): - try: - seed, seed2 = self._graph_seed_map[ops.get_default_graph()] - except KeyError: - seed, seed2 = random_seed.get_seed(self._original_seed) - self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) - return gen_dataset_ops.random_dataset( - seed=seed, seed2=seed2, **dataset_ops.flat_structure(self)) + seed=self._seed, + seed2=self._seed2, + **dataset_ops.flat_structure(self)) @property def output_classes(self): diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py index a82e4b7d09..a4307212da 100644 --- a/tensorflow/python/data/experimental/ops/shuffle_ops.py +++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py @@ -39,32 +39,17 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset): else: self._count = ops.convert_to_tensor( count, dtype=dtypes.int64, name="count") - - # NOTE(mrry): We generate the seed-pair once per graph in which the dataset - # is iterated over, and cache it in `self._graph_seed_map`. This supports - # two features: iterating over the same `ShuffleDataset` twice in the same - # pipeline and observing the same order (by tying the seeds together with - # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, - # which requires the stateful RNG op to be created inside the same graph as - # the dataset. - self._original_seed = seed - self._graph_seed_map = {} + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): - try: - seed, seed2 = self._graph_seed_map[ops.get_default_graph()] - except KeyError: - seed, seed2 = random_seed.get_seed(self._original_seed) - self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) - # pylint: disable=protected-access input_resource = self._input_dataset._as_variant_tensor() return gen_dataset_ops.shuffle_and_repeat_dataset( input_resource, buffer_size=self._buffer_size, count=self._count, - seed=seed, - seed2=seed2, + seed=self._seed, + seed2=self._seed2, **dataset_ops.flat_structure(self)) # pylint: enable=protected-access diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index ecb24103b3..c7295d6e69 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -443,7 +443,6 @@ tf_py_test( srcs = ["shuffle_dataset_op_test.py"], additional_deps = [ ":test_base", - "@absl_py//absl/testing:parameterized", "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index 6001721726..347af18576 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -19,7 +19,6 @@ from __future__ import print_function import collections -from absl.testing import parameterized import numpy as np from tensorflow.python.data.kernel_tests import test_base @@ -32,7 +31,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import test -class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): +class ShuffleDatasetTest(test_base.DatasetTestBase): def testShuffleDataset(self): components = ( @@ -210,27 +209,5 @@ class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) - @parameterized.named_parameters( - ("ReshuffleEachIterationNoSeed", None, True), - ("ReshuffleEachIterationWithSeed", 42, True), - ("NoReshuffleEachIterationNoSeed", None, False), - ("NoReshuffleEachIterationWithSeed", 42, False), - ) - def testShuffleAndZipDataset(self, seed, reshuffle): - dataset = (dataset_ops.Dataset.range(10) - .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle) - .repeat(3)) - dataset = dataset_ops.Dataset.zip((dataset, dataset)) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - for _ in range(30): - x, y = sess.run(next_element) - self.assertEqual(x, y) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 2d036fd0d6..b7e19055f2 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -2254,34 +2254,18 @@ class ShuffleDataset(UnaryDataset): self._input_dataset = input_dataset self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") - - # NOTE(mrry): We generate the seed-pair once per graph in which the dataset - # is iterated over, and cache it in `self._graph_seed_map`. This supports - # two features: iterating over the same `ShuffleDataset` twice in the same - # pipeline and observing the same order (by tying the seeds together with - # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`, - # which requires the stateful RNG op to be created inside the same graph as - # the dataset. - self._original_seed = seed - self._graph_seed_map = {} - + self._seed, self._seed2 = random_seed.get_seed(seed) if reshuffle_each_iteration is None: self._reshuffle_each_iteration = True else: self._reshuffle_each_iteration = reshuffle_each_iteration def _as_variant_tensor(self): - try: - seed, seed2 = self._graph_seed_map[ops.get_default_graph()] - except KeyError: - seed, seed2 = random_seed.get_seed(self._original_seed) - self._graph_seed_map[ops.get_default_graph()] = (seed, seed2) - return gen_dataset_ops.shuffle_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access buffer_size=self._buffer_size, - seed=seed, - seed2=seed2, + seed=self._seed, + seed2=self._seed2, reshuffle_each_iteration=self._reshuffle_each_iteration, **flat_structure(self)) diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index 95bf3209d7..39082ce370 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -142,7 +142,6 @@ py_test( ":random_seed", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:random_ops", "//tensorflow/python:util", ], ) diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py index d24df6d957..d5169f7a53 100644 --- a/tensorflow/python/data/util/random_seed.py +++ b/tensorflow/python/data/util/random_seed.py @@ -24,7 +24,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops def get_seed(seed): @@ -38,7 +37,7 @@ def get_seed(seed): Returns: A tuple of two `tf.int64` scalar tensors that should be used for the local - seeds of the calling dataset. + seed of the calling dataset. """ seed, seed2 = random_seed.get_seed(seed) if seed is None: @@ -46,7 +45,7 @@ def get_seed(seed): else: seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") if seed2 is None: - seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64) + seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") else: with ops.name_scope("seed2") as scope: seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64) diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py index 5df2e38c62..a809151e6e 100644 --- a/tensorflow/python/data/util/random_seed_test.py +++ b/tensorflow/python/data/util/random_seed_test.py @@ -41,6 +41,7 @@ class RandomSeedTest(test.TestCase): # (input_graph_seed, input_op_seed) # and output from get_seed: # (output_graph_seed, output_op_seed) + ((None, None), (0, 0)), ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)), ((1, 1), (1, 1)), ((0, 0), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output @@ -77,18 +78,6 @@ class RandomSeedTest(test.TestCase): self.assertEqual((g_seed, op_seed), toutput, msg=msg) random_seed.set_random_seed(None) - @test_util.run_in_graph_and_eager_modes - def testNondeterministicRandomSeed(self): - random_seed.set_random_seed(None) - op_seeds = [] - for _ in range(50): - g_seed, op_seed = data_random_seed.get_seed(None) - g_seed = self.evaluate(g_seed) - op_seed = self.evaluate(op_seed) - self.assertEqual(0, g_seed) - self.assertNotEqual(0, op_seed) - op_seeds.append(op_seed) - self.assertGreater(len(set(op_seeds)), 1) if __name__ == '__main__': test.main() -- GitLab From b055d78b0edbf117ec5f7f2662d3bb2781ae02b3 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Mon, 8 Oct 2018 15:09:57 -0700 Subject: [PATCH 087/411] Fix issue with type inference for ops with fixed output types Use the ArgDef::type field when available for propagating the output types from a given unsupported operator. PiperOrigin-RevId: 216257741 --- tensorflow/contrib/lite/toco/import_tensorflow.cc | 7 +++++-- .../contrib/lite/toco/import_tensorflow_test.cc | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 133ef79a34..32f22e1ea0 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1151,11 +1151,14 @@ tensorflow::Status ConvertUnsupportedOperator( op->output_data_types.push_back(ConvertDataType(output_type)); } else if (op_def != nullptr) { for (const auto& output_arg : op_def->output_arg()) { - if (HasAttr(node, output_arg.type_attr())) { + if (output_arg.type() != tensorflow::DT_INVALID) { + op->output_data_types.push_back(ConvertDataType(output_arg.type())); + } else if (HasAttr(node, output_arg.type_attr())) { op->output_data_types.push_back( ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr()))); } else { - LOG(INFO) << "Op node missing output type attribute: " << node.name(); + LOG(WARNING) << "Op node missing output type attribute: " + << node.name(); op->output_data_types.clear(); break; } diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc index 8a236d4444..cd9a144b52 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc @@ -235,6 +235,21 @@ TEST_P(TypeImportTest, BasicTypeInference) { INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest, ::testing::ValuesIn(UnaryTestTypes())); +TEST(ImportTest, TypeInferenceWithFixedOutputType) { + // Create an op that has a fixed output type (bool). + Model model; + EXPECT_TRUE(ImportNode(BuildNode("IsFinite", {{1, 2}, {2, 3}}), &model).ok()); + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + // The static output type should be indicated in the imported op. + ASSERT_THAT(op->output_data_types, + ::testing::ElementsAre(ArrayDataType::kBool)); +} + TEST(ImportTest, FailedTypeInference) { // Create a unary op with no Type ("T") annotation. NodeDef node; -- GitLab From 0b13d0806b061deaec0e96cfdca1ae4509174f89 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Mon, 8 Oct 2018 15:24:56 -0700 Subject: [PATCH 088/411] Simple comment fix in CheckpointInputPipelineHook. PiperOrigin-RevId: 216260216 --- tensorflow/python/data/experimental/ops/iterator_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py index 72d7d58f06..5eb2563977 100644 --- a/tensorflow/python/data/experimental/ops/iterator_ops.py +++ b/tensorflow/python/data/experimental/ops/iterator_ops.py @@ -198,7 +198,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook): # is run *after* this hook. That is troublesome because # 1. If a checkpoint exists and this hook restores it, the initializer hook # will override it. - # 2. If no checkpoint exists, this hook will try to save an initialized + # 2. If no checkpoint exists, this hook will try to save an uninitialized # iterator which will result in an exception. # # As a temporary fix we enter the following implicit contract between this -- GitLab From a991acba07ce6c5903ee84e4a72d3d59e22b77fc Mon Sep 17 00:00:00 2001 From: Michael Case Date: Mon, 8 Oct 2018 15:26:34 -0700 Subject: [PATCH 089/411] Internal Change. PiperOrigin-RevId: 216260437 --- tensorflow/contrib/__init__.py | 8 -------- tensorflow/python/__init__.py | 7 ------- 2 files changed, 15 deletions(-) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index e71b0e0ae3..f52a1a7bab 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -21,14 +21,6 @@ from __future__ import print_function import os -from tensorflow.python.tools import component_api_helper -component_api_helper.package_hook( - parent_package_str=( - "tensorflow.contrib"), - child_package_str=( - "tensorflow_estimator.contrib.estimator")) -del component_api_helper - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import autograph from tensorflow.contrib import batching diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 4921ecc43c..a2ab63bb48 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -48,13 +48,6 @@ import numpy as np from tensorflow.python import pywrap_tensorflow -from tensorflow.python.tools import component_api_helper -component_api_helper.package_hook( - parent_package_str='tensorflow.python', - child_package_str=( - 'tensorflow_estimator.python.estimator')) -del component_api_helper - # Protocol buffers from tensorflow.core.framework.graph_pb2 import * from tensorflow.core.framework.node_def_pb2 import * -- GitLab From eb0f862ba60f41e8d0f06ceb6fc65f7f9905a25a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 8 Oct 2018 15:27:40 -0700 Subject: [PATCH 090/411] Automated rollback of commit 13b47e6c4f9d7b295948b1057139bf676e394b6f PiperOrigin-RevId: 216260575 --- tensorflow/core/kernels/data/iterator_ops.cc | 4 +++ .../kernels/data/map_and_batch_dataset_op.cc | 9 +++---- .../core/kernels/data/model_dataset_op.cc | 10 +++---- .../data/parallel_interleave_dataset_op.cc | 27 ++++++++----------- .../kernels/data/parallel_map_iterator.cc | 9 +++---- .../core/kernels/data/prefetch_dataset_op.cc | 10 +++---- tensorflow/core/kernels/data/writer_ops.cc | 12 ++++----- 7 files changed, 37 insertions(+), 44 deletions(-) diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 8acd6cc724..7a833668ac 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -16,8 +16,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/graph_runner.h" #include "tensorflow/core/common_runtime/renamed_device.h" +#include "tensorflow/core/common_runtime/threadpool_device.h" #include "tensorflow/core/framework/iterator.pb.h" #include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/resource_op_kernel.h" #include "tensorflow/core/framework/stats_aggregator.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/variant_op_registry.h" @@ -25,11 +27,13 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/optional_ops.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace data { diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 0fb721cd7c..f45a239793 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -445,10 +445,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { auto ctx_copy = std::make_shared(*ctx); - runner_thread_ = - MakeUnique(ctx->env(), "runner_thread"); - runner_thread_->Schedule( - std::bind(&Iterator::RunnerThread, this, ctx_copy)); + runner_thread_.reset(ctx->env()->StartThread( + {}, "runner_thread", + std::bind(&Iterator::RunnerThread, this, ctx_copy))); } } @@ -704,7 +703,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_; // Buffer for storing the (intermediate) batch results. std::deque> batch_results_ GUARDED_BY(*mu_); - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); bool cancelled_ GUARDED_BY(*mu_) = false; }; diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc index 859df57962..9aa505f4f1 100644 --- a/tensorflow/core/kernels/data/model_dataset_op.cc +++ b/tensorflow/core/kernels/data/model_dataset_op.cc @@ -18,7 +18,6 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -127,10 +126,9 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (!optimize_thread_) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - optimize_thread_ = - MakeUnique(ctx->env(), "optimize_thread"); - optimize_thread_->Schedule( - [this, new_ctx]() { OptimizeThread(new_ctx); }); + optimize_thread_.reset(ctx->env()->StartThread( + {}, "optimize_thread", + [this, new_ctx]() { OptimizeThread(new_ctx); })); } return Status::OK(); } @@ -169,7 +167,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { mutex mu_; condition_variable cond_var_; std::shared_ptr model_; - std::unique_ptr optimize_thread_ GUARDED_BY(mu_); + std::unique_ptr optimize_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; std::unique_ptr input_impl_ GUARDED_BY(mu_); }; diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 9c836b836e..6b6b3d6ab9 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -482,10 +481,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { worker_threads_.reserve(dataset()->num_threads()); for (size_t i = 0; i < dataset()->num_threads(); ++i) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - worker_threads_.emplace_back( - MakeUnique(ctx->env(), "worker_thread")); - worker_threads_.back()->Schedule( - [this, new_ctx, i]() { WorkerThread(new_ctx, i); }); + worker_threads_.emplace_back(ctx->env()->StartThread( + {}, "worker_thread", + [this, new_ctx, i]() { WorkerThread(new_ctx, i); })); } } return Status::OK(); @@ -582,10 +580,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { } workers_[i].SetInputs(s, std::move(args)); std::shared_ptr new_ctx(new IteratorContext(*ctx)); - worker_threads_.emplace_back( - MakeUnique(ctx->env(), "worker_thread")); - worker_threads_.back()->Schedule( - [this, new_ctx, i]() { WorkerThread(new_ctx, i); }); + worker_threads_.emplace_back(ctx->env()->StartThread( + {}, "worker_thread", + [this, new_ctx, i]() { WorkerThread(new_ctx, i); })); if (i < dataset()->cycle_length_) { interleave_indices_.push_back(i); } else { @@ -1050,8 +1047,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { // The worker threads. This must be last to ensure the // threads have exited before any other members are deallocated. // TODO(b/65178177): Avoid allocating additional threads. - std::vector> worker_threads_ - GUARDED_BY(mu_); + std::vector> worker_threads_ GUARDED_BY(mu_); }; const DatasetBase* const input_; @@ -1393,10 +1389,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { std::shared_ptr new_ctx(new IteratorContext(*ctx)); - runner_thread_ = - MakeUnique(ctx->env(), "runner_thread"); - runner_thread_->Schedule( - [this, new_ctx]() { RunnerThread(new_ctx); }); + runner_thread_.reset(ctx->env()->StartThread( + {}, "runner_thread", + [this, new_ctx]() { RunnerThread(new_ctx); })); } } @@ -1650,7 +1645,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { int64 num_calls_ GUARDED_BY(*mu_) = 0; std::unique_ptr thread_pool_; - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); // Identifies whether background activity should be cancelled. bool cancelled_ GUARDED_BY(*mu_) = false; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index e69274e4f2..ebf41925c9 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -181,10 +181,9 @@ class ParallelMapIterator : public DatasetBaseIterator { EXCLUSIVE_LOCKS_REQUIRED(*mu_) { if (!runner_thread_) { auto ctx_copy = std::make_shared(*ctx); - runner_thread_ = - MakeUnique(ctx->env(), "runner_thread"); - runner_thread_->Schedule( - std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)); + runner_thread_.reset(ctx->env()->StartThread( + {}, "runner_thread", + std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy))); } } @@ -332,7 +331,7 @@ class ParallelMapIterator : public DatasetBaseIterator { // Buffer for storing the invocation results. std::deque> invocation_results_ GUARDED_BY(*mu_); - std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); bool cancelled_ GUARDED_BY(*mu_) = false; }; diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc index e9c38eb8a0..754ed772db 100644 --- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { @@ -257,11 +256,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { Status EnsurePrefetchThreadStarted(IteratorContext* ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (!prefetch_thread_) { - prefetch_thread_ = - MakeUnique(ctx->env(), "prefetch_thread"); std::shared_ptr new_ctx(new IteratorContext(*ctx)); - prefetch_thread_->Schedule( - [this, new_ctx]() { PrefetchThread(new_ctx); }); + prefetch_thread_.reset(ctx->env()->StartThread( + {}, "prefetch_thread", + [this, new_ctx]() { PrefetchThread(new_ctx); })); } return Status::OK(); } @@ -365,7 +363,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { string prefix_end_; PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_); std::deque buffer_ GUARDED_BY(mu_); - std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); + std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; bool prefetch_thread_finished_ GUARDED_BY(mu_) = false; }; diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc index 7bb2077b62..3f76695bb1 100644 --- a/tensorflow/core/kernels/data/writer_ops.cc +++ b/tensorflow/core/kernels/data/writer_ops.cc @@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel { public: explicit ToTFRecordOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx), - background_worker_( - ctx->env(), - strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) { - } + thread_pool_(new thread::ThreadPool( + ctx->env(), ThreadOptions(), + strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())), + 1 /* num_threads */, false /* low_latency_hint */)) {} template Status ParseScalarArgument(OpKernelContext* ctx, @@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel { // The call to `iterator->GetNext()` may block and depend on an // inter-op thread pool thread, so we issue the call from the // owned thread pool. - background_worker_.Schedule([this, ctx, done]() { + thread_pool_->Schedule([this, ctx, done]() { string filename; OP_REQUIRES_OK_ASYNC( ctx, ParseScalarArgument(ctx, "filename", &filename), done); @@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel { } private: - BackgroundWorker background_worker_; + std::unique_ptr thread_pool_; }; REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU), -- GitLab From cb057ea64032e551027c8f9058a9d28a258c9d6b Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Mon, 8 Oct 2018 15:42:17 -0700 Subject: [PATCH 091/411] [XLA] Make overly-specific ShapeUtil predicate a little more general. PiperOrigin-RevId: 216263039 --- tensorflow/compiler/xla/service/hlo_instruction_test.cc | 3 ++- tensorflow/compiler/xla/service/hlo_query.cc | 2 +- tensorflow/compiler/xla/shape_util.cc | 5 +++-- tensorflow/compiler/xla/shape_util.h | 5 ++++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index c1b7c3832b..d93351fe04 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -135,7 +135,8 @@ TEST_F(HloInstructionTest, BasicProperties) { auto parameter = HloInstruction::CreateParameter(1, r0f32_, "foo"); EXPECT_EQ(HloOpcode::kParameter, parameter->opcode()); - EXPECT_TRUE(ShapeUtil::IsScalarF32(parameter->shape())); + EXPECT_TRUE(ShapeUtil::IsScalarWithElementType(parameter->shape(), F32)); + EXPECT_FALSE(ShapeUtil::IsScalarWithElementType(parameter->shape(), S32)); EXPECT_EQ(0, parameter->operand_count()); } diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc index 2a07b6fcbc..2d5197be9e 100644 --- a/tensorflow/compiler/xla/service/hlo_query.cc +++ b/tensorflow/compiler/xla/service/hlo_query.cc @@ -24,7 +24,7 @@ namespace hlo_query { bool IsConstantR0F32(HloInstruction* instruction, float* out) { if (instruction->opcode() == HloOpcode::kConstant && - ShapeUtil::IsScalarF32(instruction->shape())) { + ShapeUtil::IsScalarWithElementType(instruction->shape(), F32)) { *out = instruction->literal().Get({}); return true; } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 7f0201942b..9267de3cfc 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -461,8 +461,9 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return ShapeUtil::IsArray(shape) && ElementsIn(shape) == 0; } -/* static */ bool ShapeUtil::IsScalarF32(const Shape& shape) { - return shape.element_type() == F32 && Rank(shape) == 0; +/* static */ bool ShapeUtil::IsScalarWithElementType( + const Shape& shape, PrimitiveType element_type) { + return IsScalar(shape) && shape.element_type() == element_type; } namespace { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index d8bb27beae..73f541d505 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -312,7 +312,10 @@ class ShapeUtil { static bool IsEffectiveScalar(const Shape& shape) { return IsArray(shape) && TrueRank(shape) == 0; } - static bool IsScalarF32(const Shape& shape); + + // Returns whether "shape" is a scalar (array) with the given element_type. + static bool IsScalarWithElementType(const Shape& shape, + PrimitiveType element_type); // Extracts the size of the shape's dimension at dimension number // GetDimensionNumber(dimension_number). -- GitLab From 783627bf63cdfa467e7811f2bf8330555d66f313 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 15:55:18 -0700 Subject: [PATCH 092/411] Convert TensorFlow's aws dependency to new third party import method. PiperOrigin-RevId: 216265275 --- tensorflow/workspace.bzl | 14 +++----------- third_party/aws/BUILD | 1 + third_party/{aws.BUILD => aws/BUILD.bazel} | 0 third_party/aws/workspace.bzl | 15 +++++++++++++++ 4 files changed, 19 insertions(+), 11 deletions(-) create mode 100644 third_party/aws/BUILD rename third_party/{aws.BUILD => aws/BUILD.bazel} (100%) create mode 100644 third_party/aws/workspace.bzl diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index adeac62e43..40c226a861 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -20,12 +20,15 @@ load( "//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", "def_file_filter_configure", ) +load("//third_party/aws:workspace.bzl", aws = "repo") load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("//third_party/icu:workspace.bzl", icu = "repo") load("//third_party/jpeg:workspace.bzl", jpeg = "repo") load("//third_party/nasm:workspace.bzl", nasm = "repo") def initialize_third_party(): + """ Load third party repositories. See above load() statements. """ + aws() flatbuffers() icu() jpeg() @@ -585,17 +588,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): ], ) - tf_http_archive( - name = "aws", - build_file = clean_dep("//third_party:aws.BUILD"), - sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", - strip_prefix = "aws-sdk-cpp-1.3.15", - urls = [ - "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", - "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", - ], - ) - java_import_external( name = "junit", jar_sha256 = "59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a", diff --git a/third_party/aws/BUILD b/third_party/aws/BUILD new file mode 100644 index 0000000000..2f5d02becb --- /dev/null +++ b/third_party/aws/BUILD @@ -0,0 +1 @@ +# Dummy BUILD file to make this directory a package. diff --git a/third_party/aws.BUILD b/third_party/aws/BUILD.bazel similarity index 100% rename from third_party/aws.BUILD rename to third_party/aws/BUILD.bazel diff --git a/third_party/aws/workspace.bzl b/third_party/aws/workspace.bzl new file mode 100644 index 0000000000..c216638154 --- /dev/null +++ b/third_party/aws/workspace.bzl @@ -0,0 +1,15 @@ +"""loads the aws library, used by TF.""" + +load("//third_party:repo.bzl", "third_party_http_archive") + +def repo(): + third_party_http_archive( + name = "aws", + urls = [ + "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", + "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", + ], + sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", + strip_prefix = "aws-sdk-cpp-1.3.15", + build_file = "//third_party/aws:BUILD.bazel", + ) -- GitLab From 46d296b2d03ddbb6f0723d213fdfa9c5226e1e2a Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Mon, 8 Oct 2018 16:24:49 -0700 Subject: [PATCH 093/411] Internal change PiperOrigin-RevId: 216270385 --- tensorflow/contrib/lite/build_def.bzl | 40 +++++++++++++++++++++++---- tensorflow/contrib/lite/testing/BUILD | 4 +-- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 7ef26de69f..b9e933a8b6 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -212,7 +212,8 @@ def json_to_tflite(name, src, out): # This is the master list of generated examples that will be made into tests. A # function called make_XXX_tests() must also appear in generate_examples.py. -# Disable a test by commenting it out. If you do, add a link to a bug or issue. +# Disable a test by adding it to the blacklists specified in +# generated_test_models_failing(). def generated_test_models(): return [ "add", @@ -291,12 +292,38 @@ def generated_test_models(): "tile", "topk", "transpose", - #"transpose_conv", # disabled due to b/111213074 + "transpose_conv", "unpack", "where", "zeros_like", ] +# List of models that fail generated tests for the conversion mode. +# If you have to disable a test, please add here with a link to the appropriate +# bug or issue. +def generated_test_models_failing(conversion_mode): + if not conversion_mode: + return [ + "transpose_conv", # disabled due to b/111213074 + ] + + if conversion_mode == "toco-flex": + # TODO(b/117328698): Fix and enable the known flex failures. + return [ + "arg_min_max", + "div", + "floor_div", + "gather ", + "lstm ", + "resize_bilinear", + "space_to_batch_nd", + "split", + "transpose", + "unpack", + ] + + return [] + def generated_test_conversion_modes(): """Returns a list of conversion modes.""" @@ -313,10 +340,14 @@ def generated_test_models_all(): tests = generated_test_models() options = [] for conversion_mode in conversion_modes: + failing_tests = generated_test_models_failing(conversion_mode) for test in tests: + tags = [] + if test in failing_tests: + tags.append("notap") if conversion_mode: test += "_%s" % conversion_mode - options.append((conversion_mode, test)) + options.append((conversion_mode, test, tags)) return options def gen_zip_test(name, test_name, conversion_mode, **kwargs): @@ -336,9 +367,6 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): # if conversion_mode == "pb2lite": # toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite" flags = "--ignore_toco_errors --run_with_flex" - kwargs["tags"].append("skip_already_failing") - kwargs["tags"].append("no_oss") - kwargs["tags"].append("notap") gen_zipped_test_file( name = "zip_%s" % test_name, diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index f0bfec2338..45baad782a 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -35,7 +35,7 @@ load( ":zip_%s" % test_name, ], shard_count = 20, - tags = [ + tags = tags + [ "gen_zip_test", "no_oss", "tflite_not_portable_intentional", @@ -61,7 +61,7 @@ load( "//tensorflow/core:android_tensorflow_test_lib", ], }), -) for conversion_mode, test_name in generated_test_models_all()] +) for conversion_mode, test_name, tags in generated_test_models_all()] test_suite( name = "generated_zip_tests", -- GitLab From 8815f34385eb28f1cfcb53bebd526c11573f3027 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 8 Oct 2018 16:25:40 -0700 Subject: [PATCH 094/411] Avoid calling get_default_graph() during tf.enable_eager_execution() PiperOrigin-RevId: 216270497 --- tensorflow/python/framework/ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 77c2bc930e..140bd098a6 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5457,8 +5457,7 @@ def enable_eager_execution_internal(config=None, "tf.contrib.eager.ASYNC") if context.default_execution_mode == context.GRAPH_MODE: graph_mode_has_been_used = ( - _default_session_stack.stack - or len(get_default_graph().get_operations()) > 0) # pylint: disable=g-explicit-length-test + _default_graph_stack._global_default_graph is not None) # pylint: disable=protected-access if graph_mode_has_been_used: raise ValueError( "tf.enable_eager_execution must be called at program startup.") -- GitLab From 49643265c3f1f279a93bd8bc3a126e11e979bc44 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 8 Oct 2018 17:14:47 -0700 Subject: [PATCH 095/411] Remove deprecations for some of the endpoints in ApiDef files. These changes are made according to https://github.com/tensorflow/community/pull/16. I am keeping a few symbols deprecated not mentioned in the doc: tf.diag - it seems best to keep it next to tf.linalg.diag, so that the two are easy to compare and decide which one to use. The plan is to rename tf.diag to tf.tensor_diag. tf.is_nan - similar to tf.is_inf, tf.is_finite, tf.is_numeric_tensor which are all getting deprecated and replaced by symbols in tf.debugging. tf.string_to_number - other string endpoints in root namespace are getting deprecated: for e.g. tf.substr, tf.string_join. tf.dequantize - all quantization ops should be under tf.quantize. I probably missed this one. tf.check_numerics - similar to other debugging ops that are getting moved to tf.debugging. tf.squared_difference - moved to tf.math namespace and not as popular as some other math ops such as tf.add to justify keeping endpoint in root. tf.decode_raw - similar to other ops such as tf.decode_csv that are getting moved to tf.io.decode_csv. PiperOrigin-RevId: 216278010 --- tensorflow/core/api_def/python_api/api_def_Acos.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Add.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_AsString.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Asin.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Atan.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Cos.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Equal.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Exp.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Floor.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Greater.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Less.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Log.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Sin.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt | 1 - tensorflow/core/api_def/python_api/api_def_Tan.pbtxt | 1 - 29 files changed, 29 deletions(-) diff --git a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt index 1fd8baf05f..f4d7f498b2 100644 --- a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "acos" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt index f7946652ef..e921f26d1e 100644 --- a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "acosh" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt index fb505a91ac..4c6f387ebd 100644 --- a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "add" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt index ea65543a76..d51defc376 100644 --- a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "as_string" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt index eedf4553c6..b13f5c398f 100644 --- a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "asin" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt index 10c2fb356e..89a3f9da44 100644 --- a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "asinh" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt index 03dd5dc848..4403a2379c 100644 --- a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "atan" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt index 85b27bd881..56eed0f0fb 100644 --- a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "atan2" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt index ee7c0600d6..a8f5e792f0 100644 --- a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "atanh" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt index 1af8c0c2c9..db52d25ff2 100644 --- a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "cos" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt index 2de87df40d..74bf573565 100644 --- a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "cosh" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt index 78aa1b3bc5..34717e74bc 100644 --- a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "equal" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt index 70323fe5b4..38a9078d9f 100644 --- a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "exp" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt index 9b93caa0b1..14accd2b20 100644 --- a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "floor" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt index 7de60d44c4..7926deaa3b 100644 --- a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "greater" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt index 9c8975c2a9..21bbb1b094 100644 --- a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "greater_equal" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt index 055df2922a..0b5f06e99f 100644 --- a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "less" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt index d2803ddb69..afc4f2a8c9 100644 --- a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "less_equal" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt index 26d2473b9c..ac4a4454c7 100644 --- a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "log" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt index d85b6dccec..5a2d77a417 100644 --- a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "log1p" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt index 80bd98b740..d4e6a7a380 100644 --- a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "logical_and" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt index b2244c44b1..49068738a4 100644 --- a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "logical_not" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt index cf78b52e07..a5133962dc 100644 --- a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "logical_or" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt index bcff379b71..130729ece1 100644 --- a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "maximum" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt index 9aae74226a..8aded1f154 100644 --- a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "minimum" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt index f37317854f..07fe3b6af1 100644 --- a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "not_equal" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt index 9c19a1a177..a2b776ee0c 100644 --- a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "sin" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt index 155e58e6d5..38c7c729bf 100644 --- a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "sinh" - deprecated: true } } diff --git a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt index ffa92f5580..20cfac05fd 100644 --- a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt @@ -5,6 +5,5 @@ op { } endpoint { name: "tan" - deprecated: true } } -- GitLab From 03d097bc96080981098ffdbaf1b3465e6e153a6a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 17:33:22 -0700 Subject: [PATCH 096/411] Consolidate device parameter arguments into a shared DeviceInfo struct PiperOrigin-RevId: 216280197 --- tensorflow/core/grappler/costs/cost_estimator.h | 5 +++++ tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 2 +- tensorflow/core/grappler/costs/op_level_cost_estimator.h | 6 ------ tensorflow/python/grappler/cluster.i | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h index e91f0cc9da..569d9da683 100644 --- a/tensorflow/core/grappler/costs/cost_estimator.h +++ b/tensorflow/core/grappler/costs/cost_estimator.h @@ -30,6 +30,11 @@ struct GrapplerItem; constexpr int64 kMemoryUnknown = -1ll; constexpr int64 kZeroMemory = 0ll; +struct DeviceInfo { + double gigaops; // Billions of operations executed per second. + double gb_per_sec; // Bandwidth to main memory in GB per second. +}; + // Holds the set of things we might want to estimate or measure in Grappler. // Always produce execution time. Other fields are optional depending on the // estimator being used. diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 71f4d9fd05..f363f2915f 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -372,7 +372,7 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const { return costs; } -OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo( +DeviceInfo OpLevelCostEstimator::GetDeviceInfo( const DeviceProperties& device) const { double gflops = -1; double gb_per_sec = -1; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index a277dfdf65..dd1ee39cb2 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -40,12 +40,6 @@ class OpLevelCostEstimator { virtual Costs PredictCosts(const OpContext& op_context) const; - // Basic device performance info, sufficient for roofline estimate. - struct DeviceInfo { - double gigaops; // Billions of operations executed per second. - double gb_per_sec; // Bandwidth to main memory in GB per second. - }; - // Returns basic device performance info. virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const; diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i index 6816e20407..87795ffcfb 100644 --- a/tensorflow/python/grappler/cluster.i +++ b/tensorflow/python/grappler/cluster.i @@ -308,7 +308,7 @@ static PyObject* TF_GetSupportedDevices(GCluster cluster, GItem item) { static double TF_EstimatePerformance(const tensorflow::NamedDevice& device) { tensorflow::grappler::OpLevelCostEstimator estimator; - tensorflow::grappler::OpLevelCostEstimator::DeviceInfo info = + tensorflow::grappler::DeviceInfo info = estimator.GetDeviceInfo(device.properties()); return info.gigaops; } -- GitLab From 4ff7b81514ea1b86295bc74b620e3c1d3e127e6f Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 8 Oct 2018 17:37:44 -0700 Subject: [PATCH 097/411] Fix the seeding for `Dataset.shuffle(..., reshuffle_each_iteration=False)`. Previously, we were passing the first (graph-level) seed for both the graph-level and op-level seeds when creating a C++ dataset. This change passes the op-level seed to the appropriate point, and adds a test for the behavior with graph-but-not-op-level seeds. PiperOrigin-RevId: 216280641 --- .../core/kernels/data/shuffle_dataset_op.cc | 2 +- tensorflow/python/data/kernel_tests/BUILD | 3 ++ .../kernel_tests/shuffle_dataset_op_test.py | 35 ++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 66466d6a36..9f54c381a9 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { int64 buffer_size, int64 seed, int64 seed2, int64 count) : ShuffleDatasetBase(ctx, input, buffer_size, count), seed_(seed), - seed2_(seed) {} + seed2_(seed2) {} string DebugString() const override { return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_, diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index c7295d6e69..671b7ca1bb 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -443,12 +443,15 @@ tf_py_test( srcs = ["shuffle_dataset_op_test.py"], additional_deps = [ ":test_base", + "@absl_py//absl/testing:parameterized", "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:random_seed", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:iterator_ops", ], diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index 347af18576..8694f58a24 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import collections +from absl.testing import parameterized import numpy as np from tensorflow.python.data.kernel_tests import test_base @@ -27,11 +28,13 @@ from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.platform import test -class ShuffleDatasetTest(test_base.DatasetTestBase): +class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testShuffleDataset(self): components = ( @@ -209,5 +212,35 @@ class ShuffleDatasetTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + @parameterized.named_parameters( + ("ReshuffleGraphLevelSeed", True, 38, None), + ("ReshuffleOpLevelSeed", True, None, 42), + ("ReshuffleGraphAndOpLevelSeed", True, 38, 42), + ("NoReshuffleGraphLevelSeed", False, 38, None), + ("NoReshuffleOpLevelSeed", False, None, 42), + ("NoReshuffleGraphAndOpLevelSeed", False, 38, 42), + ) + def testShuffleSeed(self, reshuffle, graph_level_seed, op_level_seed): + results = [] + for _ in range(2): + with ops.Graph().as_default() as g: + random_seed.set_random_seed(graph_level_seed) + dataset = dataset_ops.Dataset.range(10).shuffle( + 10, seed=op_level_seed, reshuffle_each_iteration=reshuffle).repeat( + 3) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + run_results = [] + with self.session(graph=g) as sess: + for _ in range(30): + run_results.append(sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + results.append(run_results) + + self.assertAllEqual(results[0], results[1]) + + if __name__ == "__main__": test.main() -- GitLab From 934fde5b8c60987db36438ab4f70f8a91bce306b Mon Sep 17 00:00:00 2001 From: James Qin Date: Mon, 8 Oct 2018 17:40:07 -0700 Subject: [PATCH 098/411] Register int64 SUM GPU kernel. PiperOrigin-RevId: 216280913 --- tensorflow/core/kernels/reduction_ops_sum.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc index 5318d8c133..cf0d0f5c71 100644 --- a/tensorflow/core/kernels/reduction_ops_sum.cc +++ b/tensorflow/core/kernels/reduction_ops_sum.cc @@ -51,6 +51,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .HostMemory("reduction_indices"), \ ReductionOp>); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_complex64(REGISTER_GPU_KERNELS); TF_CALL_complex128(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS -- GitLab From d58712b7fc8de0e1f87fe2ea5221bc3c85230ed3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 18:12:42 -0700 Subject: [PATCH 099/411] Add a tracing::ScopedActivity event to track the duration of a Session::Run() call for better xprof tracing. Also annotate synchronous op execution with the session-run id (or step_id) as metadata leveraging the support introduced in cl/215985561. This should enable highlighting the duration of a Session::Run and all the ops that ran in it for visualizing latency regressions in the case of CPU inference. PiperOrigin-RevId: 216284682 --- tensorflow/core/common_runtime/direct_session.cc | 4 ++++ tensorflow/core/common_runtime/executor.cc | 12 ++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 458e133b68..52c1cd2691 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -64,6 +64,7 @@ limitations under the License. #include "tensorflow/core/platform/device_tracer.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/env_var.h" @@ -453,6 +454,9 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys, RunMetadata* run_metadata) { + string session_id_meta = strings::StrCat("SessionRun #id=", step_id, "#"); + tracing::ScopedActivity activity(session_id_meta); + const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1); std::unique_ptr debugger_state; diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 40ec1502da..eb69d1991c 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -1771,14 +1771,18 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { // The OpKernel may create child activities (such as GPU kernel // launches), so use a `ScopedAnnotation` to relate these activities // in the trace. - tracing::ScopedAnnotation activity(op_name, - op_kernel->type_string()); + tracing::ScopedAnnotation activity( + op_name, strings::StrCat(op_kernel->type_string(), + "#id=", step_id_, "#")); device->Compute(op_kernel, &ctx); } else { // Use the cheaper `ScopedActivity` to trace just the OpKernel // execution. - tracing::ScopedActivity activity(op_name, op_kernel->type_string(), - item.kernel_is_expensive); + tracing::ScopedActivity activity( + op_name, + strings::StrCat(op_kernel->type_string(), "#id=", step_id_, + "#"), + item.kernel_is_expensive); device->Compute(op_kernel, &ctx); } } else { -- GitLab From 3a0434e6ff6bc8c68dd15933e005352f4cdf9a6e Mon Sep 17 00:00:00 2001 From: "Li, Yiqiang" Date: Tue, 9 Oct 2018 09:58:50 +0800 Subject: [PATCH 100/411] Fix bug in MklSlice op when allocating output tensor. Wrongly "+1" for output shape, that will cause CopyFrom failure in MklToTf op because of tensor size and shape mismatch. --- tensorflow/core/kernels/mkl_slice_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc index d63e14adf6..85cabeb92b 100644 --- a/tensorflow/core/kernels/mkl_slice_op.cc +++ b/tensorflow/core/kernels/mkl_slice_op.cc @@ -327,7 +327,7 @@ class MklDnnSliceOp : public OpKernel { output_mkl_shape->SetTfLayout(input_mkl_shape.GetDimension(), output_dims, input_mkl_shape.GetTfDataFormat()); - output_tf_shape.AddDim((output_pd->get_size() / sizeof(T)) + 1); + output_tf_shape.AddDim(output_pd->get_size() / sizeof(T)); } else { // If input is not in Mkl layout, then output won't be in Mkl layout. output_mkl_shape->SetMklTensor(false); -- GitLab From 375c109659d2d0e6265447dffdeb460693b3cccf Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 8 Oct 2018 21:18:36 -0700 Subject: [PATCH 101/411] [XLA] Introduce input/output alias config. - This CL intruduces input/output alias config in HLO module that allows any HLO pass to configure it. Once the alias_config is set, each backend needs to follow the contract during execution time to make sure the input and output are indeed aliased. - Copy insertion / buffer assignment and alias analysis has been updated to correctly honor the config and avoid any possible liveness interference. PiperOrigin-RevId: 216299501 --- tensorflow/compiler/xla/service/BUILD | 21 ++ .../compiler/xla/service/buffer_assignment.cc | 34 ++-- .../compiler/xla/service/buffer_value.h | 3 + .../compiler/xla/service/copy_insertion.cc | 85 +++++++- .../xla/service/copy_insertion_test.cc | 183 +++++++++++++++++ tensorflow/compiler/xla/service/hlo.proto | 29 +++ .../xla/service/hlo_alias_analysis.cc | 46 ++++- .../xla/service/hlo_alias_analysis_test.cc | 175 +++++++++++++++++ .../xla/service/hlo_dataflow_analysis.cc | 2 +- .../service/hlo_input_output_alias_config.cc | 172 ++++++++++++++++ .../service/hlo_input_output_alias_config.h | 101 ++++++++++ .../hlo_input_output_alias_config_test.cc | 184 ++++++++++++++++++ tensorflow/compiler/xla/service/hlo_module.cc | 9 + tensorflow/compiler/xla/service/hlo_module.h | 14 ++ .../compiler/xla/service/hlo_verifier.cc | 2 + tensorflow/compiler/xla/shape_util.h | 2 +- 16 files changed, 1037 insertions(+), 25 deletions(-) create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2b292ed053..26ebb88e96 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -294,6 +294,7 @@ cc_library( srcs = [ "dfs_hlo_visitor.cc", "hlo_computation.cc", + "hlo_input_output_alias_config.cc", "hlo_instruction.cc", "hlo_instructions.cc", "hlo_module.cc", @@ -308,6 +309,7 @@ cc_library( "hlo_clone_context.h", "hlo_computation.h", "hlo_domain_metadata.h", + "hlo_input_output_alias_config.h", "hlo_instruction.h", "hlo_instructions.h", "hlo_module.h", @@ -1268,6 +1270,25 @@ tf_cc_test( ], ) +tf_cc_test( + name = "hlo_input_output_alias_config_test", + srcs = ["hlo_input_output_alias_config_test.cc"], + deps = [ + ":hlo", + ":hlo_dce", + ":hlo_memory_scheduler", + ":hlo_ordering", + ":hlo_parser", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + "@com_google_absl//absl/algorithm:container", + ], +) + cc_library( name = "hlo_memory_scheduler", srcs = ["hlo_memory_scheduler.cc"], diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 2c2d1626c2..d5d6a044a8 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice( void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size) { - VLOG(4) << "Trying to add " << buffer << " to " << this; + VLOG(4) << "Trying to add " << buffer << " to allocation #" << index(); CHECK(assigned_buffers_.count(&buffer) == 0) << "LogicalBuffer " << buffer << " already assigned to allocation " << index_; @@ -784,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } } - if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - const HloComputation* entry_computation = - assignment->module_->entry_computation(); - for (auto param : entry_computation->parameter_instructions()) { - for (auto& param_buffer : - assignment->points_to_analysis().GetBuffersDefinedByInstruction( - param)) { - if (assignment->liveness().MayInterfere(*param_buffer, buffer)) { - VLOG(4) << "Can't assign: Parameter interference with result"; - return false; - } - } - } - } - // If the buffer is live out of the computation then it should only be // assigned a buffer which exactly fits the result to avoid wasting memory // (result buffers can have arbitrary lifetimes). @@ -1434,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets( // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated // in the same allocation (currently just supports kWhile, kCall, and -// kConditional). +// kConditional and input output aliasing). void BufferAssigner::BuildColocatedBufferSets( const HloModule* module, const BufferLiveness& buffer_liveness, const LogicalBuffer::SizeFunction& buffer_size, std::vector* colocated_buffer_sets) { const TuplePointsToAnalysis& points_to_analysis = buffer_liveness.points_to_analysis(); + + // Set up colocated buffer set for input and output. + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + std::vector colocated_set; + AddBufferToColocatedSet(module->entry_computation()->root_instruction(), + output_index, points_to_analysis, + &colocated_set); + AddBufferToColocatedSet( + module->entry_computation()->parameter_instruction(param_number), + param_index, points_to_analysis, &colocated_set); + AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets); + }); + for (const HloComputation* computation : module->MakeComputationPostOrder()) { if (computation->IsFusionComputation()) { continue; diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h index 69b3646356..11d8abc5ba 100644 --- a/tensorflow/compiler/xla/service/buffer_value.h +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -141,6 +141,9 @@ class BufferValue { // operator< is required for std::set. bool operator<(const BufferValue& other) const { return id_ < other.id_; } + bool operator==(const BufferValue& other) const { return id_ == other.id_; } + bool operator!=(const BufferValue& other) const { return id_ != other.id_; } + virtual string ToString() const = 0; // TODO(lauj) rename LogicalBufferProto to BufferValueProto. diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index f35324aa35..cfe025fdd1 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -40,10 +40,12 @@ namespace { using absl::StrAppend; -bool IsEntryParameterValue(const HloValue& value) { +bool IsReadonlyEntryParameterValue(const HloValue& value) { const HloComputation* computation = value.defining_instruction()->parent(); return value.defining_instruction()->opcode() == HloOpcode::kParameter && - computation == computation->parent()->entry_computation(); + computation == computation->parent()->entry_computation() && + !computation->parent()->input_output_alias_config().ParameterHasAlias( + value.defining_instruction()->parameter_number()); } bool IsConstantValue(const HloValue& value) { @@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) { } bool ValueIsReadOnly(const HloValue& value) { - return IsConstantValue(value) || IsEntryParameterValue(value); + return IsConstantValue(value) || IsReadonlyEntryParameterValue(value); } // Data structure describing the action which should be taken on parts of a @@ -332,6 +334,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, return Status::OK(); } +// Conservatively adds copies before root instruction of entry computation and +// each aliased parameter to resolve interference of aliased input and output +// buffer. We later rely on the CopyRemover to drop the unnecessary ones. +Status AddCopiesForAliasedInputOutputs(HloModule* module) { + HloComputation* entry = module->entry_computation(); + HloInstruction* root = entry->root_instruction(); + + ShapeTree output_indices_to_copy(root->shape()); + std::vector> copied_parameters; + bool has_alias = false; + for (auto* param : entry->parameter_instructions()) { + bool param_has_alias = false; + ShapeTree param_indices_to_copy(param->shape()); + + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + if (param_number == param->parameter_number()) { + param_has_alias = true; + *(param_indices_to_copy.mutable_element(param_index)) = true; + *(output_indices_to_copy.mutable_element(output_index)) = true; + } + }); + + if (!param_has_alias) { + continue; + } + + has_alias = true; + // Store a snapshot of users before DeepCopyInstruction, as + // DeepCopyInstruction introduces new users of the instruction. + std::vector users = param->users(); + ShapeTree param_copy_tree(param->shape(), + /*init_value=*/nullptr); + TF_ASSIGN_OR_RETURN(HloInstruction * copied, + entry->DeepCopyInstruction( + param, ¶m_indices_to_copy, ¶m_copy_tree)); + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied)); + } + + copied_parameters.push_back(param_copy_tree); + } + + if (!has_alias) { + return Status::OK(); + } + + // Add copies before root instruction. + ShapeTree output_copy_tree(root->shape(), + /*init_value=*/nullptr); + + TF_ASSIGN_OR_RETURN(HloInstruction * root_copied, + root->parent()->DeepCopyInstruction( + root, &output_indices_to_copy, &output_copy_tree)); + + // Add control dependencies between the input/output copies. + TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& input_index) -> Status { + HloInstruction* from = + copied_parameters[param_number].element(input_index); + HloInstruction* to = output_copy_tree.element(output_index); + + TF_RET_CHECK(from != nullptr); + TF_RET_CHECK(to != nullptr); + TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to)); + return Status::OK(); + })); + + entry->set_root_instruction(root_copied); + + return Status::OK(); +} + // Removes any control dependencies to or from the given instruction. Status StripControlDependenciesFrom(HloInstruction* instruction) { while (!instruction->control_successors().empty()) { @@ -953,6 +1030,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) { } } } + + TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module)); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 892d0d7b54..3096206c34 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1351,6 +1351,189 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) { EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); } +TEST_F(CopyInsertionTest, CrossingParameters) { + // Test a case where two parameters' dataflow cross with each other while + // input and output are aliased with same index: + // + // (p0 , p1) + // | \ /| + // | \ / | + // alias X alias + // | / \ | + // | / \| + // (p1 , p0) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 4); +} + +TEST_F(CopyInsertionTest, ParametersAliasing) { + // Test a case where two parameters' dataflow don't interfere with each other + // while aliased. + // + // (p0 , p1) + // | | + // | | + // alias alias + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(param, 0)), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 2); +} + +TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // (p0 , p1) + // | | + // | | + // alias | + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(param, 0)), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 2); +} + +TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + +TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | Add----+ + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, negate0, negate1)); + builder.AddInstruction(HloInstruction::CreateTuple({add, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) { // Test a while instruction with a body which permutes its tuple parameter // elements and applies one operation to one of the elements. The addition of diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index a0eb9e6ddc..82c8fb1904 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -225,6 +225,32 @@ message HloScheduleProto { map sequences = 1; } +message HloInputOutputAliasProto { + // The following proto describes a pair of aliased an input + // (described by parameter number and a ShapeIndex of the parameter) + // and an output (described by a ShapeIndex of the root + // instruction). For example: + // + // entry = { + // output_shape_index={1}, + // parameter_number=0, + // parameter_shape_index={1, 2}, + // } + // + // This entry indicates that the first paremter's {1, 2} element is + // aliased with the {1} element of the root instruction. + message AliasEntryProto { + // ShapeIndex of the root hlo. + repeated int64 output_shape_index = 1; + // Number of the parameter in entry computation. + int64 parameter_number = 2; + // ShapeIndex of the parameter instruction. + repeated int64 parameter_shape_index = 3; + } + + repeated AliasEntryProto entries = 1; +} + // Serialization of HloModule. message HloModuleProto { string name = 1; @@ -243,6 +269,9 @@ message HloModuleProto { // The schedule for this module. HloScheduleProto schedule = 7; + + // Describes alias information between inputs and outputs. + HloInputOutputAliasProto input_output_alias = 8; } // Serialization of LogicalBuffer. diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index c3da12e273..cf8e6594cb 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -59,8 +59,9 @@ class BufferValueMap { // construction process. using BufferNumber = int64; - explicit BufferValueMap(const HloDataflowAnalysis& dataflow) - : dataflow_(dataflow) { + explicit BufferValueMap(HloModule* module, + const HloDataflowAnalysis& dataflow) + : module_(module), dataflow_(dataflow) { buffers_.reserve(dataflow_.values().size()); value_to_buffer_number_.reserve(dataflow_.values().size()); for (const HloValue* value : dataflow_.values()) { @@ -171,6 +172,42 @@ class BufferValueMap { return value_to_buffer_number_.at(&value); } + void ComputeInputOutputAliasedBuffers( + const HloValue& value, std::vector* aliased_buffers) { + // Get parameter value from an aliased_input object. + const auto get_parameter_value = + [this](const std::pair& aliased_input) + -> const HloValue& { + int64 param_number = aliased_input.first; + const ShapeIndex& param_index = aliased_input.second; + return dataflow_.GetUniqueValueAt( + module_->entry_computation()->parameter_instruction(param_number), + param_index); + }; + + // If the value shows up in a root instruction, alias it with parameter + // intruction. + for (const HloPosition& pos : value.positions()) { + if (pos.instruction == module_->entry_computation()->root_instruction()) { + ShapeIndex output_index = pos.index; + + auto aliased_input = + module_->input_output_alias_config().GetAliasedParameter( + output_index); + if (aliased_input) { + aliased_buffers->push_back( + GetBufferForValue(get_parameter_value(*aliased_input))); + } + } + } + + // If the value is parameter instruction itself, alias it with itself. + if (value.instruction()->opcode() == HloOpcode::kParameter && + value.instruction()->parent() == module_->entry_computation()) { + aliased_buffers->push_back(GetBufferForValue(value)); + } + } + void ComputeWhileAliasedBuffers(const HloValue& value, std::vector* aliased_buffers) { VLOG(3) << "Compute kWhile aliases"; @@ -278,6 +315,7 @@ class BufferValueMap { VLOG(2) << "Use of value " << value.ToShortString() << ": " << use; } std::vector aliased_buffers; + ComputeInputOutputAliasedBuffers(value, &aliased_buffers); ComputeWhileAliasedBuffers(value, &aliased_buffers); ComputeConditionalAliasedBuffers(value, &aliased_buffers); // Uniquify aliased buffers. @@ -288,6 +326,8 @@ class BufferValueMap { return aliased_buffers; } + HloModule* module_; + // Dataflow analysis used to construct the buffer map. const HloDataflowAnalysis& dataflow_; @@ -461,7 +501,7 @@ StatusOr> HloAliasAnalysis::Run( /*bitcast_defines_value=*/false, fusion_can_share_buffer)); - BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); + BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis()); buffer_map.MergeAliasedBuffers(); // Create a vector of HloBuffers, one for each set of values in the diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index 0cd0ab36fc..5c8d97b2d1 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) { EXPECT_FALSE(AnyValuesInSameBufferInterfere()); } +TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) { + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) { + // parameter 0 aliased with output 1 and parameter 1 aliased with output 0. + // + // (p0 , p1) + // \ / + // \ / + // alias X + // / \ + // / \ + // (p0 , p1) + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + // Every Ops in this graph are aliased with each other. + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) { + // Test a simple single while instruction can be aliased with input and output + // of the computation. + // + // body((F32[], F32[]) %tuple_param): + // %add = Add(%tuple_param{0}, %tuple_param{1}) + // return Tuple(%tuple_param{0}, %add) + // + // condition((F32[], F32[]) %tuple_param): + // return Constant(false) + // + // entry: + // %param1 = param1 + // %while = While(%param1, body, condition) + // %while_1 = GTE(%while, 0) + // %while_2 = GTE(%while, 1) + // %negate_1 = Negate(%while_1) + // %negate_2 = Negate(%while_2) + // return Tuple(negate_1, negate_2) + // + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Element 0 passes transparently through the body. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + auto add = body_builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1)); + auto body_tuple = body_builder.AddInstruction( + HloInstruction::CreateTuple({body_element_0, add})); + HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); + + // Condition computation trivially returns a constant "false". + auto cond_builder = HloComputation::Builder("condition"); + auto cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + cond_builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(false))); + HloComputation* condition = + module_->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(tuple_shape, condition, body, param)); + auto while_element_1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0)); + auto while_element_2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1)); + auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_1)); + auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_2)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_THAT( + GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), + UnorderedElementsAre(GetValueDefinedAt(param, {1}), + GetValueDefinedAt(xla_while, /*index=*/{1}), + GetValueDefinedAt(body_param, {1}), + GetValueDefinedAt(cond_param, {1}), + GetValueDefinedAt(add), + GetValueDefinedAt(negate_2))); + + EXPECT_THAT( + analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(), + UnorderedElementsAre( + HloPosition{param, {1}}, HloPosition{xla_while, {1}}, + HloPosition{while_element_2, {}}, HloPosition{body_param, {1}}, + HloPosition{body_element_1, {}}, HloPosition{add, {}}, + HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}}, + HloPosition{cond_param, {1}}, HloPosition{negate_2, {}})); + + EXPECT_FALSE(AnyValuesInSameBufferInterfere()); +} + TEST_F(HloAliasAnalysisTest, SingleCall) { // Test a single call of a subcomputation. The subcomputation adds its two // array-shaped parameters. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index c22adcdd8d..f401eac016 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction, const HloValue& HloDataflowAnalysis::GetValueDefinedAt( const HloInstruction* instruction, const ShapeIndex& index) const { - CHECK(ValueIsDefinedAt(instruction, index)); + CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString(); return GetUniqueValueAt(instruction, index); } diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc new file mode 100644 index 0000000000..9ad98e5038 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -0,0 +1,172 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" + +namespace xla { +Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) { + // Output can't be aliased with multiple parameters. + TF_RET_CHECK(!alias_.element(output_index)); + (*alias_.mutable_element(output_index)) = + std::make_pair(param_number, param_index); + return Status::OK(); +} + +HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { + HloInputOutputAliasProto result; + alias_.ForEachElement( + [&](const ShapeIndex& index, + const absl::optional>& data) { + if (data) { + HloInputOutputAliasProto::AliasEntryProto entry; + for (int64 i : index) { + entry.add_output_shape_index(i); + } + entry.set_parameter_number(data->first); + for (int64 i : data->second) { + entry.add_parameter_shape_index(i); + } + result.add_entries()->Swap(&entry); + } + }); + return result; +} + +StatusOr HloInputOutputAliasConfig::CreateFromProto( + const HloModule* module, const HloInputOutputAliasProto& proto) { + HloInputOutputAliasConfig result( + module->entry_computation()->root_instruction()->shape()); + for (const HloInputOutputAliasProto::AliasEntryProto& entry : + proto.entries()) { + ShapeIndex output_index(entry.output_shape_index().begin(), + entry.output_shape_index().end()); + + int64 param_number = entry.parameter_number(); + ShapeIndex param_index(entry.parameter_shape_index().begin(), + entry.parameter_shape_index().end()); + TF_RETURN_IF_ERROR( + result.SetUpAlias(output_index, param_number, param_index)); + } + + return result; +} + +string HloInputOutputAliasConfig::ToString() const { + std::vector pieces; + pieces.push_back("HloInputOutputAliasConfig"); + + ForEachAlias([&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + pieces.push_back(absl::StrFormat( + " OutputIndex %s is aliased with parameter %lld at %s:", + output_index.ToString(), param_number, param_index.ToString())); + }); + + return absl::StrJoin(pieces, "\n"); +} + +bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const { + bool output = false; + alias_.ForEachElement( + [&](const xla::ShapeIndex&, + absl::optional> alias) { + if (alias && alias->first == param_number) { + output = true; + } + }); + return output; +} + +absl::optional HloInputOutputAliasConfig::GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const { + absl::optional output; + alias_.ForEachElement( + [&](const xla::ShapeIndex& output_index, + absl::optional> alias) { + if (alias && alias->first == param_number && + alias->second == param_index) { + output = output_index; + } + }); + return output; +} + +absl::optional> +HloInputOutputAliasConfig::GetAliasedParameter( + const ShapeIndex& output_index) const { + CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)); + return alias_.element(output_index); +} + +void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const { + alias_.ForEachElement( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + fn(output_index, aliased->first, aliased->second); + } + }); +} + +Status HloInputOutputAliasConfig::ForEachAliasWithStatus( + AliasFnWithStatus fn) const { + return alias_.ForEachElementWithStatus( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second)); + } + return Status::OK(); + }); +} + +Status HloInputOutputAliasConfig::Verify(const HloModule& module) const { + std::vector> param_has_seen; + const HloComputation* entry = module.entry_computation(); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + HloInstruction* param = entry->parameter_instruction(i); + param_has_seen.emplace_back(param->shape()); + } + return ForEachAliasWithStatus([&](const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) -> Status { + const HloInstruction* root = entry->root_instruction(); + + const Shape& param_shape = + entry->parameter_instruction(param_number)->shape(); + const Shape& output_shape = root->shape(); + TF_RET_CHECK(entry->num_parameters() > param_number); + TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index)); + TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index)); + + // Check each param_number and param_index pair only show up once. No + // input can be aliased with output buffers. + TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false); + + *(param_has_seen[param_number].mutable_element(param_index)) = true; + + return Status::OK(); + }); +} + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config) { + out << config.ToString(); + return out; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h new file mode 100644 index 0000000000..02c46f65c8 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -0,0 +1,101 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ + +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/shape_util.h" + +namespace xla { + +class HloModule; + +// This class specifies the alias map from output index to parameter number and +// parameter index in the entry computation. +class HloInputOutputAliasConfig { + public: + HloInputOutputAliasConfig() = default; + + explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {} + + virtual ~HloInputOutputAliasConfig() = default; + + // Sets up alias config from `output_index` to `param_index` at + // `param_number`. + Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index); + + // Returns true if the given parameter is aliased with one of the output + // buffers. + bool ParameterHasAlias(int64 param_number) const; + + // (De)Serializes an HloInputOutoutAliasConfig to/from an + // HloInputOutoutAliasProto. + HloInputOutputAliasProto ToProto() const; + + static StatusOr CreateFromProto( + const HloModule* module, const HloInputOutputAliasProto& proto); + + // Returns the output index that the given parameter and parameter index is + // aliased with. A nullopt is returned if there is no output that is aliased + // with the parameter number and index. + absl::optional GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const; + + // Returns the number of parameter and index of the parameter buffer that the + // given output buffer index is aliased with. A nullopt is returned if there + // is no parameter is aliased with the specific output. + absl::optional> GetAliasedParameter( + const ShapeIndex& output_index) const; + + using AliasFn = + std::function; + + // Iterates through each aliased output and input. + void ForEachAlias(AliasFn fn) const; + + using AliasFnWithStatus = + std::function; + + // Verifies that the given config is valid for the given module. + // Specifically, the config's input and output should be in-bound and size of + // the aliased buffers should match. + Status Verify(const HloModule& module) const; + + Status ForEachAliasWithStatus(AliasFnWithStatus fn) const; + + string ToString() const; + + private: + // A ShapeTree which indicates the list of buffers that's expected to be + // aliased. The key on this shape tree represents the output index. The value + // is a pair of parameter number and index into the buffer. If the value is + // nullopt, it means there is no parameter aliasing for this output. + ShapeTree>> alias_; +}; + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config); + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc new file mode 100644 index 0000000000..3b61ff04e6 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" + +#include +#include + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_ordering.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { +class HloInputOutputAliasConfigTest : public HloTestBase { + protected: + void expect_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_TRUE(aliased_output); + EXPECT_EQ(aliased_output.value(), output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_TRUE(aliased_param); + EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index)); + } + + void expect_not_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_FALSE(aliased_output && aliased_output == output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_FALSE(aliased_param && aliased_param->first == param_number && + aliased_param->second == param_index); + } +}; + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + param = (f32[], f32[]) parameter(0) + gte1 = f32[] get-tuple-element(%param), index=0 + gte2 = f32[] get-tuple-element(%param), index=1 + ROOT root = (f32[], f32[]) tuple(%gte1, %gte2) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0}, config); + + expect_aliased(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.Verify(*module)); +} + +TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); +} +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 93e04eb3db..547f74a0ed 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -73,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal( config_.SetDefaultComputationLayout( entry_computation_->ComputeProgramShape()); } + input_output_alias_config_ = HloInputOutputAliasConfig( + entry_computation_->root_instruction()->shape()); } if (uniquify_identifiers) { @@ -252,6 +254,9 @@ HloModuleProto HloModule::ToProto() const { if (has_schedule()) { *proto.mutable_schedule() = schedule().ToProto().ValueOrDie(); } + + *proto.mutable_input_output_alias() = input_output_alias_config().ToProto(); + return proto; } @@ -328,6 +333,10 @@ StatusOr> HloModule::CreateFromProto( } TF_RET_CHECK(module->entry_computation_ != nullptr); + TF_ASSIGN_OR_RETURN(module->input_output_alias_config_, + HloInputOutputAliasConfig::CreateFromProto( + module.get(), proto.input_output_alias())); + // Because we didn't uniquify the names or the ids, double-check that the // instruction and computation names and ids are unique from the proto. absl::flat_hash_set computation_names; diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 735804e827..9b9dc3ba9f 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/hlo_schedule.h" @@ -212,6 +213,15 @@ class HloModule { return result; } + // input_output_alias_config indicates the list of aliased buffers that are + // expected from the module. + HloInputOutputAliasConfig& input_output_alias_config() { + return input_output_alias_config_; + } + const HloInputOutputAliasConfig& input_output_alias_config() const { + return input_output_alias_config_; + } + // Returns the number of unique intruction ids given out. All ids up to // this point are guaranteed to be in the range [0..NumUniqueInstructionIds()) int NumUniqueInstructionIds() const { return next_unique_id_; } @@ -284,6 +294,10 @@ class HloModule { // sequential order of instructions for each non-fusion computation in the // module. absl::optional schedule_; + + // alias_config indicates the alias information of input/output buffers that + // are expected from the module. + HloInputOutputAliasConfig input_output_alias_config_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index be3bee5975..2902a11a42 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -1220,6 +1220,8 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(module->schedule().Verify()); } + TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module)); + return false; } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 73f541d505..51cedce7f0 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -72,7 +72,7 @@ class ShapeIndex { void push_back(int64 value) { indices_.push_back(value); } void pop_back() { indices_.pop_back(); } - // push_front is O(n^2), but shapes don't usually have a ton of dimensions. + // push_front is O(n), but shapes don't usually have a ton of dimensions. void push_front(int64 value) { indices_.insert(indices_.begin(), value); } using container_type = absl::InlinedVector; -- GitLab From a593c6885bec8c545665ec2f25d794777be55ba9 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Mon, 8 Oct 2018 21:23:08 -0700 Subject: [PATCH 102/411] Automated rollback of commit 07df147ab20c4a5329148e5fb5f7f6b187cb73a4 PiperOrigin-RevId: 216299809 --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7488cedec5..225c0a91e3 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -114,8 +114,7 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("scoped_allocator", new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(), cfg_.scoped_allocator_opts())); - MK_OPT("pin_to_host", - new PinToHostOptimizer(cfg_.pin_to_host_optimization())); + MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization())); return std::unique_ptr(); } @@ -162,7 +161,7 @@ Status MetaOptimizer::InitializeOptimizers( if (cfg_.remapping() != RewriterConfig::OFF) { optimizers->push_back(MakeUnique(cfg_.remapping())); } - if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) { + if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) { optimizers->push_back(MakeUnique()); } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { @@ -592,7 +591,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || cfg.debug_stripper() == RewriterConfig::ON || cfg.scoped_allocator_optimization() == RewriterConfig::ON || - cfg.pin_to_host_optimization() != RewriterConfig::OFF || + cfg.pin_to_host_optimization() == RewriterConfig::ON || !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } -- GitLab From d1f0494b89a31298df7743018c0a3fa388ac16a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 22:13:04 -0700 Subject: [PATCH 103/411] Add Floor_mod to schema. PiperOrigin-RevId: 216303340 --- tensorflow/contrib/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7809d114e2..6117cbf9f1 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -120,6 +120,7 @@ typedef enum { kTfLiteBuiltinSquare = 92, kTfLiteBuiltinZerosLike = 93, kTfLiteBuiltinFill = 94, + kTfLiteBuiltinFloorMod = 95, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index b092e5ee54..890d9c04bb 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -651,6 +651,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SQUARE: case BuiltinOperator_ZEROS_LIKE: case BuiltinOperator_FILL: + case BuiltinOperator_FLOOR_MOD: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index f23a0ccb80..c7005eb53e 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -679,6 +679,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_SQUARE: case tflite::BuiltinOperator_ZEROS_LIKE: case tflite::BuiltinOperator_FILL: + case tflite::BuiltinOperator_FLOOR_MOD: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index cb7a282743..2b36209e5f 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -176,6 +176,7 @@ enum BuiltinOperator : byte { SQUARE = 92, ZEROS_LIKE = 93, FILL = 94, + FLOOR_MOD = 95, } // Options for the builtin operators. @@ -251,6 +252,7 @@ union BuiltinOptions { BidirectionalSequenceLSTMOptions, BidirectionalSequenceRNNOptions, UnidirectionalSequenceLSTMOptions, + FloorModOptions, } enum Padding : byte { SAME, VALID } @@ -618,6 +620,9 @@ table ZerosLikeOptions { table FillOptions { } +table FloorModOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index e7b7a59def..3aaa99ec55 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -241,6 +241,9 @@ struct ZerosLikeOptionsT; struct FillOptions; struct FillOptionsT; +struct FloorModOptions; +struct FloorModOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -401,11 +404,12 @@ enum BuiltinOperator { BuiltinOperator_SQUARE = 92, BuiltinOperator_ZEROS_LIKE = 93, BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_FILL + BuiltinOperator_MAX = BuiltinOperator_FLOOR_MOD }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -500,7 +504,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] { BuiltinOperator_REDUCE_ANY, BuiltinOperator_SQUARE, BuiltinOperator_ZEROS_LIKE, - BuiltinOperator_FILL + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD }; return values; } @@ -602,6 +607,7 @@ inline const char * const *EnumNamesBuiltinOperator() { "SQUARE", "ZEROS_LIKE", "FILL", + "FLOOR_MOD", nullptr }; return names; @@ -685,11 +691,12 @@ enum BuiltinOptions { BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, BuiltinOptions_BidirectionalSequenceRNNOptions = 70, BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions + BuiltinOptions_MAX = BuiltinOptions_FloorModOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -762,7 +769,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] { BuiltinOptions_FillOptions, BuiltinOptions_BidirectionalSequenceLSTMOptions, BuiltinOptions_BidirectionalSequenceRNNOptions, - BuiltinOptions_UnidirectionalSequenceLSTMOptions + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions }; return values; } @@ -841,6 +849,7 @@ inline const char * const *EnumNamesBuiltinOptions() { "BidirectionalSequenceLSTMOptions", "BidirectionalSequenceRNNOptions", "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", nullptr }; return names; @@ -1139,6 +1148,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1738,6 +1751,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? reinterpret_cast(value) : nullptr; } + FloorModOptionsT *AsFloorModOptions() { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } + const FloorModOptionsT *AsFloorModOptions() const { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -6241,6 +6262,46 @@ inline flatbuffers::Offset CreateFillOptions( flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct FloorModOptionsT : public flatbuffers::NativeTable { + typedef FloorModOptions TableType; + FloorModOptionsT() { + } +}; + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorModOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FloorModOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFloorModOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -6587,6 +6648,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const { return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; } + const FloorModOptions *builtin_options_as_FloorModOptions() const { + return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6902,6 +6966,10 @@ template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_opt return builtin_options_as_UnidirectionalSequenceLSTMOptions(); } +template<> inline const FloorModOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorModOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -9286,6 +9354,29 @@ inline flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBuffe _fbb); } +inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new FloorModOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFloorModOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFloorModOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -9759,6 +9850,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -10061,6 +10156,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -10351,6 +10450,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + return CreateFloorModOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10641,6 +10744,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_FloorModOptions: { + value = new FloorModOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -11003,6 +11110,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From e27ee15fa45a5f4e43e10ed1fe0eb3a1feb4253a Mon Sep 17 00:00:00 2001 From: Peter Ma Date: Mon, 8 Oct 2018 23:12:08 -0700 Subject: [PATCH 104/411] Refactor CalculateOutputSize() from VirtualScheduler protected member function to utils; Refactor EstimateSize() from memory_optimizer.cc to utils; some small changes for readability improvement PiperOrigin-RevId: 216307257 --- tensorflow/core/grappler/costs/BUILD | 1 + tensorflow/core/grappler/costs/utils.cc | 40 ++++++- tensorflow/core/grappler/costs/utils.h | 11 ++ tensorflow/core/grappler/costs/utils_test.cc | 112 +++++++++++++----- .../core/grappler/costs/virtual_scheduler.cc | 48 ++------ .../core/grappler/costs/virtual_scheduler.h | 22 ++-- .../grappler/costs/virtual_scheduler_test.cc | 48 +------- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/memory_optimizer.cc | 26 +--- 9 files changed, 161 insertions(+), 148 deletions(-) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index f3dc2c2091..46eacd3a06 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -236,6 +236,7 @@ tf_cc_test( name = "virtual_scheduler_test", srcs = ["virtual_scheduler_test.cc"], deps = [ + ":utils", ":virtual_placer", ":virtual_scheduler", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 5415324b48..2fcadf1de3 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -74,7 +74,8 @@ static std::vector ExtractTensors(const AttrValue& attr_value) { } break; } - default: {} + default: { + } } return tensors; } @@ -201,6 +202,43 @@ std::vector FindInputFeatures( return inputs; } +int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) { + int64 size = DataTypeSize(BaseType(prop.dtype())); + TensorShapeProto shape = prop.shape(); + + // Can't infer the size if the rank is unknown. It has to be at least a + // scalar though. + if (shape.unknown_rank()) { + LOG(WARNING) << "CalculateTensorSize() -- unknown rank"; + return size; + } + + // If one of the dimensions is unknown statically, assume it's at least one. + for (int i = 0; i < shape.dim_size(); ++i) { + if (shape.dim(i).size() < 0) { + shape.mutable_dim(i)->set_size(1); + LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i; + } + } + + int64 num_elems = TensorShape(shape).num_elements(); + return num_elems * size; +} + +int64 CalculateOutputSize( + const std::vector& output_properties, + const int port_num) { + if (port_num < 0) return 4; // 4B for control dependency. + + if (port_num >= output_properties.size()) { + LOG(ERROR) << "CalculateOutputSize() -- port_num: " << port_num + << " >= output_properties.size(): " << output_properties.size(); + return 0; + } + + return CalculateTensorSize(output_properties[port_num]); +} + DeviceProperties GetDeviceInfo(const string& device_str) { DeviceProperties unknown; unknown.set_type("UNKNOWN"); diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h index 5fd6717712..ea64e5a41d 100644 --- a/tensorflow/core/grappler/costs/utils.h +++ b/tensorflow/core/grappler/costs/utils.h @@ -43,6 +43,17 @@ std::vector FindInputFeatures( const std::unordered_map& name_to_cost, const std::unordered_map& name_to_node); +// Returns the size of tensor (unit: bytes). For tensor shape with unknown rank, +// it assumes the tensor to be scalar. For any unknown dimension, it assumes +// size one. +int64 CalculateTensorSize(const OpInfo::TensorProperties& prop); + +// Returns the size of output at port_num (unit: bytes). A special case is +// port_num -1, which is for control dependency and assumed to be 4 bytes. +int64 CalculateOutputSize( + const std::vector& output_properties, + int port_num); + // Returns the DeviceProperties of the device on which 'node' runs. DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node); DeviceProperties GetDeviceInfo(const string& device_str); diff --git a/tensorflow/core/grappler/costs/utils_test.cc b/tensorflow/core/grappler/costs/utils_test.cc index baa654f475..db5c11f0fe 100644 --- a/tensorflow/core/grappler/costs/utils_test.cc +++ b/tensorflow/core/grappler/costs/utils_test.cc @@ -26,36 +26,42 @@ limitations under the License. namespace tensorflow { namespace grappler { -class UtilsTest : public ::testing::Test { - public: - void CreateConstOp(const string& name, std::initializer_list dims, - NodeDef* node) { - Tensor tensor(DT_FLOAT, TensorShape(dims)); - for (int64 i = 0; i < tensor.NumElements(); ++i) { - tensor.flat()(i) = i / 10.0f; - } - TF_CHECK_OK(NodeDefBuilder(name, "Const") - .Attr("dtype", DT_FLOAT) - .Attr("value", tensor) - .Finalize(node)); - } +namespace { - void CreateConstSizesOp(const string& name, const std::vector& sizes, - NodeDef* node) { - TensorShape shape; - shape.AddDim(sizes.size()); - Tensor tensor(DT_INT32, shape); - for (int64 i = 0; i < tensor.NumElements(); ++i) { - tensor.flat()(i) = sizes[i]; - } - TF_CHECK_OK(NodeDefBuilder(name, "Const") - .Attr("dtype", DT_INT32) - .Attr("value", tensor) - .Finalize(node)); - } -}; +void CreateConstOp(const string& name, std::initializer_list dims, + NodeDef* node) { + Tensor tensor(DT_FLOAT, TensorShape(dims)); + for (int64 i = 0; i < tensor.NumElements(); ++i) + tensor.flat()(i) = i / 10.0f; + TF_CHECK_OK(NodeDefBuilder(name, "Const") + .Attr("dtype", DT_FLOAT) + .Attr("value", tensor) + .Finalize(node)); +} -TEST_F(UtilsTest, ConvOpInfo) { +void CreateConstSizesOp(const string& name, const std::vector& sizes, + NodeDef* node) { + TensorShape shape; + shape.AddDim(sizes.size()); + Tensor tensor(DT_INT32, shape); + for (int64 i = 0; i < tensor.NumElements(); ++i) + tensor.flat()(i) = sizes[i]; + TF_CHECK_OK(NodeDefBuilder(name, "Const") + .Attr("dtype", DT_INT32) + .Attr("value", tensor) + .Finalize(node)); +} + +// Helper method for converting shapes vector to TensorProperty. +OpInfo::TensorProperties ShapeToTensorProperty(const std::vector& shapes, + const DataType& data_type) { + OpInfo::TensorProperties prop; + prop.set_dtype(data_type); + for (int shape : shapes) prop.mutable_shape()->add_dim()->set_size(shape); + return prop; +} + +TEST(UtilsTest, ConvOpInfo) { int batch = 32; int rows = 7; int cols = 9; @@ -146,7 +152,7 @@ TEST_F(UtilsTest, ConvOpInfo) { } } -TEST_F(UtilsTest, TestSkipControlInput) { +TEST(UtilsTest, TestSkipControlInput) { GraphDef graph; TF_CHECK_OK(NodeDefBuilder("constant", "Const") .Attr("dtype", DT_INT32) @@ -172,6 +178,52 @@ TEST_F(UtilsTest, TestSkipControlInput) { EXPECT_TRUE(node_found); } +TEST(UtilsTest, CalculateTensorSize) { + // Test normal usage. + EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1, + CalculateTensorSize(ShapeToTensorProperty({1}, DT_FLOAT))); + EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4, + CalculateTensorSize(ShapeToTensorProperty({4, 4}, DT_FLOAT))); + EXPECT_EQ(DataTypeSize(DT_HALF) * 10 * 10 * 10, + CalculateTensorSize(ShapeToTensorProperty({10, 10, 10}, DT_HALF))); + EXPECT_EQ( + DataTypeSize(DT_FLOAT) * 100 * 7 * 8 * 99, + CalculateTensorSize(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT))); + + // Test unknown rank: assumes the tensor to be a scalar. + OpInfo::TensorProperties t = ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT); + t.mutable_shape()->set_unknown_rank(true); + EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1, CalculateTensorSize(t)); + + // Test unknown shape: assumes unknown shape (-1) to have size 1. + EXPECT_EQ( + DataTypeSize(DT_FLOAT) * 1 * 7 * 8 * 99, + CalculateTensorSize(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT))); + EXPECT_EQ( + DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99, + CalculateTensorSize(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT))); +} + +TEST(UtilsTest, CalculateOutputSize) { + // Create a set of tensor properties. + std::vector output = { + ShapeToTensorProperty({4, 4}, DT_FLOAT), // 0 + ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT) // 1 + }; + + // Test valid outputs. + EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4, CalculateOutputSize(output, 0)); + EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99, + CalculateOutputSize(output, 1)); + + // port_num -1 is for control dependency: hard coded 4B. + EXPECT_EQ(4, CalculateOutputSize(output, -1)); + + // Invalid port_num (though it may be an error) shall yield zero + // output size. + EXPECT_EQ(0, CalculateOutputSize(output, 2)); +} + // Class for testing TensorSizeHistogram. class TestTensorSizeHistogram : public TensorSizeHistogram { public: @@ -285,5 +337,7 @@ TEST(DeviceClassTest, GetDeviceClassForNonChannelDevice) { EXPECT_EQ("//GPU", GetDeviceClassForNonChannelDevice("/device:GPU:7")); } +} // namespace + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 037a823096..5b93fb128f 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -473,6 +473,7 @@ Status VirtualScheduler::Init() { VLOG(1) << "Some feed nodes were not consumed by the fetch fanin: " << str_util::Join(feed_nodes, ","); } + initialized_ = true; return Status::OK(); } @@ -695,38 +696,6 @@ NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) { return it->second; } -int64 VirtualScheduler::CalculateOutputSize( - const std::vector& output_properties, - const int port_num) const { - if (port_num < 0) { - return 4; // 4B for control dependency. - } - - if (port_num >= output_properties.size()) { - VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- " - << "port_num: " << port_num - << " >= output_properties.size(): " << output_properties.size(); - return 0; - } - - const auto& output = output_properties[port_num]; - int64 output_size = DataTypeSize(BaseType(output.dtype())); - - for (const auto& dim : output.shape().dim()) { - auto dim_size = dim.size(); - if (dim_size < 0) { - // Zero output size if there's any unknown dim. - output_size = 0; - VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- " - << "unknown dim: " << output_size; - break; - } - output_size *= dim_size; - } - - return output_size; -} - Costs& VirtualScheduler::FindOrCreateZero(const string& op_name, std::map* op_cost) { auto it = op_cost->find(op_name); @@ -744,7 +713,10 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { const NodeDef* node = ready_nodes_->GetCurrNode(); const string& op_name = node->op(); - // Also keep track of op counts and times per op (with their shapes). + auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_); + op_cost = CombineCosts(op_cost, node_costs); + + // Also keep track of op counts and costs per op (with their shapes). OpContext op_context = GetCurrNode(); string node_description = GetOpDescription(op_context.op_info); op_counts_[node_description] += 1; @@ -752,9 +724,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { std::make_pair(node_costs.execution_time.asMicroSeconds().count(), !node_costs.inaccurate); - auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_); - op_cost = CombineCosts(op_cost, node_costs); - // Update node and device states. auto& node_state = node_map_[node]; auto& device = device_[node_state.device_name]; @@ -795,7 +764,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { << ", scheduled: " << node_state.time_scheduled.count() << ", finished: " << node_state.time_finished.count(); - // Increment num_inputs_ready of the output nodes + // Increment num_inputs_ready of the output nodes and maybe add to ready nodes for (const auto& port_num_output_pair : node_state.outputs) { for (auto* output_node : port_num_output_pair.second) { auto& output_state = node_map_[output_node]; @@ -812,7 +781,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { } } - // Increment num_outputs_executed of the input nodes. + // Increment num_outputs_executed of the input nodes and maybe update memory. for (const auto& input_port : node_state.inputs) { auto* input = input_port.first; auto port = input_port.second; @@ -841,7 +810,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { } } - // Remove the current node; assume FIFO. ready_nodes_->RemoveCurrNode(); return !ready_nodes_->Empty(); @@ -1007,7 +975,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { return Summary(); } - // Fill RunMetadata. + // Fill RunMetadata's step_stats and partition_graphs fields. StepStats* stepstats = metadata->mutable_step_stats(); for (const auto& device : device_) { GraphDef* device_partition_graph = metadata->add_partition_graphs(); diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h index 0e66e8a463..bead84af29 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.h +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -107,10 +107,10 @@ struct DeviceState { mem_usage_snapshot_at_peak; Costs device_costs; - std::map op_to_cost; // Per-op cost. - std::map op_to_memory; // Per-op memory usage at peak usage. - int64 memory_usage; - int64 max_memory_usage; + std::map op_to_cost; // Per-op cost. + + int64 memory_usage; // Current temporary memory usage + int64 max_memory_usage; // Max temporary memory usage DeviceState() { device_costs = Costs::ZeroCosts(); @@ -283,13 +283,6 @@ class VirtualScheduler { return &node_map_; } - protected: - // Returns the size of output at port_num (unit: bytes). A special case is - // port_num -1, which is for control dependency and assumed to be 4 bytes. - int64 CalculateOutputSize( - const std::vector& output_properties, - const int port_num) const; - private: // Constants. const string kAttrInputSrc = "input_source_"; @@ -321,8 +314,11 @@ class VirtualScheduler { std::vector> additional_nodes_; // Stats: - std::map op_counts_; // Op counts with key with input shape. - // Individual op costs (with input shapes). + // Op counts with key with input shape. + // Example key: "[Op=AssignSub, input_shapes=[[7,1,160,160][7,1,160,160]]" + std::map op_counts_; + // Individual op costs with key with input shape. + // Integer field for execution time in micro seconds. // Boolean field for whether the cost is accurate. std::map> op_costs_; diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index 80889afc86..99272dd7e9 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -19,12 +19,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor_description.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/grappler/clusters/virtual_cluster.h" +#include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/grappler/costs/virtual_placer.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { namespace grappler { + // Class for testing virtual scheduler. class TestVirtualScheduler : public VirtualScheduler { public: @@ -33,7 +35,6 @@ class TestVirtualScheduler : public VirtualScheduler { : VirtualScheduler(grappler_item, use_static_shapes, cluster, &ready_node_manager_) {} - FRIEND_TEST(VirtualSchedulerTest, CalculateOutputSize); FRIEND_TEST(VirtualSchedulerTest, MemoryUsage); FRIEND_TEST(VirtualSchedulerTest, ControlDependency); FRIEND_TEST(VirtualSchedulerTest, ComplexDependency); @@ -1034,17 +1035,6 @@ versions { } } - // Helper method for converting shape vector to TensorProperty. - OpInfo::TensorProperties ShapeToTensorProperty( - const std::vector shape, const DataType& data_type) const { - OpInfo::TensorProperties tensor_property; - tensor_property.set_dtype(data_type); - for (const auto& x : shape) { - tensor_property.mutable_shape()->add_dim()->set_size(x); - } - return tensor_property; - } - // SetUp() inits cluster_ and placer_. std::unique_ptr cluster_; std::unique_ptr placer_; @@ -1729,38 +1719,6 @@ TEST_F(VirtualSchedulerTest, InitAndBasicScheduling) { EXPECT_EQ(2, ops_executed["c1"].op_info.inputs_size()); } -TEST_F(VirtualSchedulerTest, CalculateOutputSize) { - // Init. - CreateGrapplerItemWithAddN(); - InitScheduler(); - - // Create a set of tensor properties. - std::vector output; - output.push_back(ShapeToTensorProperty({4, 4}, DT_FLOAT)); // 0 - output.push_back(ShapeToTensorProperty({1}, DT_FLOAT)); // 1 - output.push_back(ShapeToTensorProperty({10, 10, 10}, DT_HALF)); // 2 - output.push_back(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT)); // 3 - output.push_back(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT)); // 4 - output.push_back(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT)); // 4 - - // port_num -1 is for control dependency: hard coded 4B. - EXPECT_EQ(4, scheduler_->CalculateOutputSize(output, -1)); - - // Test valid outputs. - EXPECT_EQ(4 * 4 * 4, scheduler_->CalculateOutputSize(output, 0)); - EXPECT_EQ(4 * 1, scheduler_->CalculateOutputSize(output, 1)); - EXPECT_EQ(2 * 10 * 10 * 10, scheduler_->CalculateOutputSize(output, 2)); - EXPECT_EQ(4 * 100 * 7 * 8 * 99, scheduler_->CalculateOutputSize(output, 3)); - - // Any unknown shape (-1) shall yield zero output size. - EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 4)); - EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 5)); - - // Invalid port_num (though it may be an error) shall yield zero - // output size. - EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 6)); -} - TEST_F(VirtualSchedulerTest, MemoryUsage) { // Init. CreateGrapplerItemWithAddN(); @@ -2041,7 +1999,7 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) { for (const auto& output_property : output_properties_) { output_properties.push_back(output_property); } - return scheduler_->CalculateOutputSize(output_properties, 0); + return CalculateOutputSize(output_properties, 0); }; // Validate transfer size. diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index c708f84948..e898377ded 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -423,6 +423,7 @@ cc_library( "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler/costs:graph_memory", "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/costs:utils", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", ], diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index c775a26914..73f0977242 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/grappler/costs/graph_memory.h" #include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/grappler/graph_view.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" @@ -43,6 +44,8 @@ limitations under the License. namespace tensorflow { namespace grappler { +namespace { + // Prefix added to nodes which are recomputed. const char* kRecomputedNodePrefix = "Recomputed"; const char* kRecomputeTriggerNodePrefix = "RecomputeTrigger"; @@ -744,25 +747,6 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, return Status::OK(); } -static int64 EstimateSize(const OpInfo::TensorProperties& t) { - DataType dtype = t.dtype(); - int64 size = DataTypeSize(dtype); - TensorShapeProto shape = t.shape(); - if (shape.unknown_rank()) { - // Can't infer the size if the rank is unknown. It has to be at least a - // scalar though. - return size; - } - // If one of the dimensions is unknown statically, assume it's at least one. - for (int i = 0; i < shape.dim_size(); ++i) { - if (shape.dim(i).size() < 0) { - shape.mutable_dim(i)->set_size(1); - } - } - int64 num_elems = TensorShape(shape).num_elements(); - return num_elems * size; -} - struct SwapInfo { std::vector inputs_to_swap; Costs::NanoSeconds time_to_swap = 0; @@ -1149,7 +1133,7 @@ bool SwappingPass(RewriterConfig::MemOptType optimization_level, int64 bytes_to_swap = 0; for (int64 input_id : swap_info.inputs_to_swap) { const OpInfo::TensorProperties& t = props[input_id]; - bytes_to_swap += EstimateSize(t); + bytes_to_swap += CalculateTensorSize(t); } // Let's assume we're going to swap over PCIe running at 16 GBps. swap_info.time_to_swap = bytes_to_swap / 16; @@ -1299,6 +1283,8 @@ Status RelaxAllocatorConstraints(GraphDef* optimized_graph) { return Status::OK(); } +} // namespace + Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { *optimized_graph = item.graph; -- GitLab From 129bb5e845ccb2ab6339e85d39545800dac6ca33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 23:42:02 -0700 Subject: [PATCH 105/411] Automated rollback of commit 5f308cb408eb46ec9af0546be6b9ae1d5166b185 PiperOrigin-RevId: 216309111 --- tensorflow/core/grappler/op_types.cc | 22 +-- .../optimizers/pin_to_host_optimizer.cc | 162 ++++++------------ .../optimizers/pin_to_host_optimizer.h | 4 +- .../optimizers/pin_to_host_optimizer_test.cc | 76 +++----- 4 files changed, 85 insertions(+), 179 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index cbf5c8e038..1b5a215987 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -102,19 +102,15 @@ bool IsConjugateTranspose(const NodeDef& node) { } bool IsControlFlow(const NodeDef& node) { - // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative - // string comparison. - static const gtl::FlatSet* const kControFlowOps = - CHECK_NOTNULL((new gtl::FlatSet{ - "ControlTrigger", - "Enter", - "Exit", - "LoopCond", - "Merge", - "NextIteration", - "Switch", - })); - return kControFlowOps->count(node.op()) > 0; + // clang-format off + return node.op() == "ControlTrigger" || + node.op() == "Enter" || + node.op() == "Exit" || + node.op() == "LoopCond" || + node.op() == "Merge" || + node.op() == "NextIteration" || + node.op() == "Switch"; + // clang-format on } bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; } diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc index 29a3b2b74c..8ed4271fa4 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc @@ -25,29 +25,16 @@ limitations under the License. #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { namespace grappler { - namespace internal { -namespace { // TODO(williamchan): Change this constant to be something smarter, maybe // dynamically determined. constexpr int64 kTensorMaxSize = 64; -struct OpDevicePortHasher { - std::size_t operator()(const std::tuple& x) const { - uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x))); - - return Hash64Combine(code, hash()(std::get<2>(x))); - } -}; -using OpDevicePortOnHostMap = - gtl::FlatMap, bool, OpDevicePortHasher>; - // All the nodes that should be blacklisted and not swapped. bool IsBlacklisted(const NodeDef& node) { return @@ -95,10 +82,10 @@ Status TryFindKernelDef(const std::vector& devices, // Checks if a node's output port is host friendly. // Roughly this means checking if the output port is on Host memory. -Status IsNodeOutputPortHostFriendly( - const GraphView& graph, GraphProperties* properties, const NodeDef& node, - int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache, - bool* is_candidate) { +Status IsNodeOutputPortHostFriendly(const GraphView& graph, + GraphProperties* properties, + const NodeDef& node, int port_id, + bool* is_candidate) { *is_candidate = false; // Make sure we are not a blacklisted op. @@ -130,8 +117,7 @@ Status IsNodeOutputPortHostFriendly( for (const auto& fanin : graph.GetFanins(node, false)) { bool fanin_candidate = false; TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( - graph, properties, *fanin.node, fanin.port_id, - op_device_outport_pinned_to_host_cache, &fanin_candidate)); + graph, properties, *fanin.node, fanin.port_id, &fanin_candidate)); if (!fanin_candidate) { return Status::OK(); } @@ -146,22 +132,11 @@ Status IsNodeOutputPortHostFriendly( return Status::OK(); } - // Check `op_device_outport_pinned_to_host_cache` for our - // {op, device, port_id} combo to see if the arg is pinned on Host. - const std::tuple cache_key(node.op(), node.device(), - port_id); - auto it = op_device_outport_pinned_to_host_cache->find(cache_key); - if (it != op_device_outport_pinned_to_host_cache->end()) { - *is_candidate = it->second; - return Status::OK(); - } - // Check if op's output port is pinned to HostMemory. const OpDef* op = nullptr; Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op); if (!s.ok()) { LOG(WARNING) << "Could not find OpDef for : " << node.op(); - op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -171,7 +146,6 @@ Status IsNodeOutputPortHostFriendly( LOG(WARNING) << "Invalid port: " << port_id << "!\n" << node.DebugString() << "\n" << op->DebugString(); - op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -181,7 +155,6 @@ Status IsNodeOutputPortHostFriendly( &kernel); if (!s.ok()) { LOG(INFO) << "Could not find KernelDef for: " << node.op(); - op_device_outport_pinned_to_host_cache->emplace(cache_key, false); return Status::OK(); } @@ -193,35 +166,22 @@ Status IsNodeOutputPortHostFriendly( } } - op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate); - return Status::OK(); } // Checks if a node's input port is Host friendly. // Roughly this means checking if the input port is on Host memory. -bool IsNodeInputPortHostFriendly( - const NodeDef& node, int port_id, - OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) { +bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) { // If node is on Host, assume its inputs are Host friendly. if (str_util::StrContains(node.device(), DEVICE_CPU)) { return true; } - // Check `op_device_inport_pinned_to_host_cache` for our - // {op, device, port_id} combo to see if the arg is pinned on Host. - std::tuple cache_key(node.op(), node.device(), port_id); - auto it = op_device_inport_pinned_to_host_cache->find(cache_key); - if (it != op_device_inport_pinned_to_host_cache->end()) { - return it->second; - } - // Check if op's input port is pinned to HostMemory. const OpDef* op = nullptr; Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op); if (!s.ok()) { LOG(WARNING) << "Could not find OpDef for : " << node.op(); - op_device_inport_pinned_to_host_cache->emplace(cache_key, false); return false; } const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id); @@ -232,20 +192,16 @@ bool IsNodeInputPortHostFriendly( {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel); if (!s.ok()) { LOG(INFO) << "Could not find KernelDef for: " << node.op(); - op_device_inport_pinned_to_host_cache->emplace(cache_key, false); return false; } // Check if the input_arg is pinned to Host. for (const string& host_memory_arg : kernel->host_memory_arg()) { if (op->input_arg(input_arg_id).name() == host_memory_arg) { - op_device_inport_pinned_to_host_cache->emplace(cache_key, true); return true; } } - op_device_inport_pinned_to_host_cache->emplace(cache_key, false); - return false; } @@ -255,29 +211,38 @@ bool IsNodeInputPortHostFriendly( // 2] Check if node can run on Host. // 3] Check all input/outputs are Host "friendly" (atm, friendly means small, // ints, and pinned to Host). -Status IsNodeHostCandidate( - const GraphView& graph, GraphProperties* properties, const NodeDef& node, - OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache, - bool* is_candidate) { +Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties, + const NodeDef& node, bool* is_candidate) { *is_candidate = false; - // Skip these node types. - if (IsBlacklisted(node)) { - return Status::OK(); - } - // Check if node already on CPU. if (str_util::StrContains(node.device(), DEVICE_CPU)) { *is_candidate = true; return Status::OK(); } + // Skip these node types. + if (IsBlacklisted(node)) { + return Status::OK(); + } + // Check the node can be run on CPU. Status s = TryFindKernelDef({DEVICE_CPU}, node, nullptr); if (!s.ok()) { return Status::OK(); } + // Check all inputs are Host friendly. + for (const GraphView::OutputPort& fanin : + graph.GetFanins(node, /*include_controlling_nodes=*/false)) { + bool fanin_candidate = false; + TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( + graph, properties, *fanin.node, fanin.port_id, &fanin_candidate)); + if (!fanin_candidate) { + return Status::OK(); + } + } + // Check all outputs are Host friendly. if (!properties->has_properties()) { // This is an expensive call, call it lazily. @@ -290,42 +255,16 @@ Status IsNodeHostCandidate( } } - // Check all inputs are Host friendly. - for (const GraphView::OutputPort& fanin : - graph.GetFanins(node, /*include_controlling_nodes=*/false)) { - bool fanin_candidate = false; - TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly( - graph, properties, *fanin.node, fanin.port_id, - op_device_outport_pinned_to_host_cache, &fanin_candidate)); - if (!fanin_candidate) { - return Status::OK(); - } - } - *is_candidate = true; return Status::OK(); } -bool IsTPUGraphDef(const GraphDef& def) { - for (const auto& node : def.node()) { - if (node.op() == "TPUCompile" || node.op() == "TPUExecute" || - node.op() == "TPUPartitionedCall") { - return true; - } - } - return false; -} -} // end namespace - -// Tries to swap `device` to a Host device from `devices`. Returns true iff -// there was a swap. -bool TrySwapToHostDevice(const gtl::FlatSet& devices, - bool has_device_cpu, string* device) { +string TryFindHostDevice(const gtl::FlatSet& devices, + bool has_device_cpu, const string& device) { // Force this node onto the CPU. - if (device->empty() && has_device_cpu) { - *device = "/device:CPU:0"; - return true; - } else if (str_util::StrContains(*device, DEVICE_GPU)) { + if (device.empty() && has_device_cpu) { + return "/device:CPU:0"; + } else if (str_util::StrContains(device, DEVICE_GPU)) { // Sometimes the cluster can have: // devices = {"/device:CPU:0", "/device:XLA_GPU:0"} // and we need to handle them properly. @@ -333,19 +272,27 @@ bool TrySwapToHostDevice(const gtl::FlatSet& devices, {std::pair("GPU", "CPU:0"), std::pair("/device", "/device:CPU:0")}) { const string device_host = - strings::StrCat(device->substr(0, device->rfind(device_match.first)), + strings::StrCat(device.substr(0, device.rfind(device_match.first)), device_match.second); if (devices.find(device_host) != devices.end()) { - *device = device_host; - return true; + return device_host; } } } - // We couldn't find an appropriate Host device, return false. - return false; + // We couldn't find an appropriate Host device, return original device. + return device; } +bool IsTPUGraphDef(const GraphDef& def) { + for (const auto& node : def.node()) { + if (node.op() == "TPUCompile" || node.op() == "TPUExecute" || + node.op() == "TPUPartitionedCall") { + return true; + } + } + return false; +} } // end namespace internal Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, @@ -377,26 +324,20 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // All the Const nodes, and their original devices in topological order. std::vector> const_nodes; - // Cache to map {op, device, port} -> bool on whether it is pinned to host. - internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache; - internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache; - for (auto& node : *optimized_graph->mutable_node()) { bool is_candidate = false; - TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate( - graph, &properties, node, &op_device_outport_pinned_to_host_cache, - &is_candidate)); + TF_RETURN_IF_ERROR( + internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate)); if (!is_candidate) { continue; } - const string original_device = node.device(); - const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu, - node.mutable_device()); - // Keep track of all Const nodes that we swapped. - if (swapped && IsConstant(node)) { - const_nodes.emplace_back(&node, original_device); + if (IsConstant(node)) { + const_nodes.emplace_back(&node, node.device()); } + // Try and swap the device to Host. + node.set_device( + internal::TryFindHostDevice(devices, has_device_cpu, node.device())); } // Traverse all `const_nodes`, and map them back to GPU greedily. @@ -408,9 +349,8 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // this node back onto the original device. for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) { // The consumer is not Host friendly, swap it back to the original device. - if (!internal::IsNodeInputPortHostFriendly( - *fanout.node, fanout.port_id, - &op_device_inport_pinned_to_host_cache)) { + if (!internal::IsNodeInputPortHostFriendly(*fanout.node, + fanout.port_id)) { node->set_device(device); break; } diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h index bed4a9ef95..d557a03463 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h @@ -26,8 +26,8 @@ namespace tensorflow { namespace grappler { namespace internal { // Try and find an appropriate Host device in `devices` given `device`. -bool TrySwapToHostDevice(const gtl::FlatSet& devices, - bool has_device_cpu, string* device); +string TryFindHostDevice(const gtl::FlatSet& devices, + bool has_device_cpu, const string& device); } // end namespace internal // Optimize TensorFlow ops that should be swapped into the CPU to avoid diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc index 9bb030b220..7c64529441 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc @@ -28,60 +28,30 @@ namespace { class PinToHostOptimizerTest : public GrapplerTest {}; -TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) { +TEST_F(PinToHostOptimizerTest, TryFindHostDevice) { gtl::FlatSet devices = {}; - - string device = "ABC"; - EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_EQ(device, "ABC"); -} - -TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) { - gtl::FlatSet devices = {"/device:CPU:0", "/device:XLA_GPU:0"}; - - string device = ""; - EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); - EXPECT_EQ(device, "/device:CPU:0"); - - device = "/device:XLA_GPU:0"; - EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); - EXPECT_EQ(device, "/device:CPU:0"); - - device = "/device:XLA_GPU:*"; - EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device)); - EXPECT_EQ(device, "/device:CPU:0"); -} - -TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) { - gtl::FlatSet devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; - - string device = ""; - EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_TRUE(device.empty()); - - device = "/device:XLA_GPU:0"; - EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_EQ(device, "/device:XLA_CPU:0"); - - device = "/device:XLA_GPU:*"; - EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_EQ(device, "/device:XLA_CPU:0"); -} - -TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) { - gtl::FlatSet devices = {"/device:XLA_GPU:0"}; - - string device = ""; - EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_TRUE(device.empty()); - - device = "/device:XLA_GPU:0"; - EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_EQ(device, "/device:XLA_GPU:0"); - - device = "/device:XLA_GPU:*"; - EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device)); - EXPECT_EQ(device, "/device:XLA_GPU:*"); + EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC")); + + devices = {"/device:CPU:0", "/device:XLA_GPU:0"}; + EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0"); + EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"), + "/device:CPU:0"); + EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"), + "/device:CPU:0"); + + devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; + EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), + "/device:XLA_CPU:0"); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), + "/device:XLA_CPU:0"); + + devices = {"/device:XLA_GPU:0"}; + EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), + "/device:XLA_GPU:0"); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), + "/device:XLA_GPU:*"); } TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) { -- GitLab From a198ca7d9bbc752a322c59b9a30519eab1b6730c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 9 Oct 2018 00:56:23 -0700 Subject: [PATCH 106/411] Enable support for PRED values in KeyValueSort for the HloEvaluator. PiperOrigin-RevId: 216315110 --- tensorflow/compiler/xla/service/hlo_evaluator.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index eec8d242fa..6cba46135c 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/index_util.h" @@ -1279,7 +1280,9 @@ StatusOr EvaluateSortInternal(HloInstruction* sort, return SafeLess(a.first, b.first); }); std::vector result_keys; - std::vector result_values; + // We use a InlinedVector here because we need to convert it to an + // absl::Span later, and this would not work with std::vector. + absl::InlinedVector result_values; for (const auto& key_value : key_value_vector) { result_keys.push_back(key_value.first); result_values.push_back(key_value.second); @@ -1316,6 +1319,9 @@ StatusOr EvaluateSortCurried(HloInstruction* sort, const Literal& keys_literal, const Literal& values_literal) { switch (sort->operand(1)->shape().element_type()) { + case PRED: + return EvaluateSortInternal(sort, keys_literal, + values_literal); case F32: return EvaluateSortInternal(sort, keys_literal, values_literal); -- GitLab From 69f60d4c8cb5edb6fdc63b837b6db29562d28744 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 02:09:06 -0700 Subject: [PATCH 107/411] compat: Update forward compatibility horizon to 2018-10-09 PiperOrigin-RevId: 216323343 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 349c84e13c..0e14c0e044 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9) @tf_export("compat.forward_compatible") -- GitLab From d5a7e27a1f7d2be65edc2b82c737d82ffe40ecde Mon Sep 17 00:00:00 2001 From: knight Date: Tue, 9 Oct 2018 19:10:43 +0800 Subject: [PATCH 108/411] improve contrib/kafka/python/kernel_tests/kafka_test.sh 1. add `docker pull` step 2. add some print message --- .../kafka/python/kernel_tests/kafka_test.sh | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh index adf027b8e7..def41c670f 100644 --- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh +++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh @@ -22,23 +22,25 @@ if [ "$#" -ne 2 ]; then exit 1 fi +action=$1 container=$2 -if [ "$1" == "start" ]; then +if [ "$action" == "start" ]; then + echo "pull spotify/kafka" + docker pull spotify/kafka + echo "pull spotify/kafka successfully" docker run -d --rm --net=host --name=$container spotify/kafka - echo Wait 5 secs until kafka is up and running + echo "Wait 5 secs until kafka is up and running" sleep 5 - echo Create test topic + echo "Create test topic" docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test' - echo Create test message + echo "Create test message" docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test' - echo Produce test message + echo "Produce test message" docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test' - - echo Container $container started successfully -elif [ "$1" == "stop" ]; then + echo "Container $container started successfully" +elif [ "$action" == "stop" ]; then docker rm -f $container - - echo Container $container stopped successfully + echo "Container $container removed successfully" else echo "Usage: $0 start|stop " >&2 exit 1 -- GitLab From e730b261f9028b2f3430461b82c30c86b9ece22f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 06:58:06 -0700 Subject: [PATCH 109/411] Automated rollback of commit 375c109659d2d0e6265447dffdeb460693b3cccf PiperOrigin-RevId: 216350134 --- tensorflow/compiler/xla/service/BUILD | 21 -- .../compiler/xla/service/buffer_assignment.cc | 34 ++-- .../compiler/xla/service/buffer_value.h | 3 - .../compiler/xla/service/copy_insertion.cc | 85 +------- .../xla/service/copy_insertion_test.cc | 183 ----------------- tensorflow/compiler/xla/service/hlo.proto | 29 --- .../xla/service/hlo_alias_analysis.cc | 46 +---- .../xla/service/hlo_alias_analysis_test.cc | 175 ----------------- .../xla/service/hlo_dataflow_analysis.cc | 2 +- .../service/hlo_input_output_alias_config.cc | 172 ---------------- .../service/hlo_input_output_alias_config.h | 101 ---------- .../hlo_input_output_alias_config_test.cc | 184 ------------------ tensorflow/compiler/xla/service/hlo_module.cc | 9 - tensorflow/compiler/xla/service/hlo_module.h | 14 -- .../compiler/xla/service/hlo_verifier.cc | 2 - tensorflow/compiler/xla/shape_util.h | 2 +- 16 files changed, 25 insertions(+), 1037 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 26ebb88e96..2b292ed053 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -294,7 +294,6 @@ cc_library( srcs = [ "dfs_hlo_visitor.cc", "hlo_computation.cc", - "hlo_input_output_alias_config.cc", "hlo_instruction.cc", "hlo_instructions.cc", "hlo_module.cc", @@ -309,7 +308,6 @@ cc_library( "hlo_clone_context.h", "hlo_computation.h", "hlo_domain_metadata.h", - "hlo_input_output_alias_config.h", "hlo_instruction.h", "hlo_instructions.h", "hlo_module.h", @@ -1270,25 +1268,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "hlo_input_output_alias_config_test", - srcs = ["hlo_input_output_alias_config_test.cc"], - deps = [ - ":hlo", - ":hlo_dce", - ":hlo_memory_scheduler", - ":hlo_ordering", - ":hlo_parser", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/core:test", - "@com_google_absl//absl/algorithm:container", - ], -) - cc_library( name = "hlo_memory_scheduler", srcs = ["hlo_memory_scheduler.cc"], diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index d5d6a044a8..2c2d1626c2 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice( void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size) { - VLOG(4) << "Trying to add " << buffer << " to allocation #" << index(); + VLOG(4) << "Trying to add " << buffer << " to " << this; CHECK(assigned_buffers_.count(&buffer) == 0) << "LogicalBuffer " << buffer << " already assigned to allocation " << index_; @@ -784,6 +784,21 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } } + if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { + const HloComputation* entry_computation = + assignment->module_->entry_computation(); + for (auto param : entry_computation->parameter_instructions()) { + for (auto& param_buffer : + assignment->points_to_analysis().GetBuffersDefinedByInstruction( + param)) { + if (assignment->liveness().MayInterfere(*param_buffer, buffer)) { + VLOG(4) << "Can't assign: Parameter interference with result"; + return false; + } + } + } + } + // If the buffer is live out of the computation then it should only be // assigned a buffer which exactly fits the result to avoid wasting memory // (result buffers can have arbitrary lifetimes). @@ -1419,28 +1434,13 @@ BufferAssigner::MergeColocatedBufferSets( // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated // in the same allocation (currently just supports kWhile, kCall, and -// kConditional and input output aliasing). +// kConditional). void BufferAssigner::BuildColocatedBufferSets( const HloModule* module, const BufferLiveness& buffer_liveness, const LogicalBuffer::SizeFunction& buffer_size, std::vector* colocated_buffer_sets) { const TuplePointsToAnalysis& points_to_analysis = buffer_liveness.points_to_analysis(); - - // Set up colocated buffer set for input and output. - module->input_output_alias_config().ForEachAlias( - [&](const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index) { - std::vector colocated_set; - AddBufferToColocatedSet(module->entry_computation()->root_instruction(), - output_index, points_to_analysis, - &colocated_set); - AddBufferToColocatedSet( - module->entry_computation()->parameter_instruction(param_number), - param_index, points_to_analysis, &colocated_set); - AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets); - }); - for (const HloComputation* computation : module->MakeComputationPostOrder()) { if (computation->IsFusionComputation()) { continue; diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h index 11d8abc5ba..69b3646356 100644 --- a/tensorflow/compiler/xla/service/buffer_value.h +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -141,9 +141,6 @@ class BufferValue { // operator< is required for std::set. bool operator<(const BufferValue& other) const { return id_ < other.id_; } - bool operator==(const BufferValue& other) const { return id_ == other.id_; } - bool operator!=(const BufferValue& other) const { return id_ != other.id_; } - virtual string ToString() const = 0; // TODO(lauj) rename LogicalBufferProto to BufferValueProto. diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cfe025fdd1..f35324aa35 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -40,12 +40,10 @@ namespace { using absl::StrAppend; -bool IsReadonlyEntryParameterValue(const HloValue& value) { +bool IsEntryParameterValue(const HloValue& value) { const HloComputation* computation = value.defining_instruction()->parent(); return value.defining_instruction()->opcode() == HloOpcode::kParameter && - computation == computation->parent()->entry_computation() && - !computation->parent()->input_output_alias_config().ParameterHasAlias( - value.defining_instruction()->parameter_number()); + computation == computation->parent()->entry_computation(); } bool IsConstantValue(const HloValue& value) { @@ -53,7 +51,7 @@ bool IsConstantValue(const HloValue& value) { } bool ValueIsReadOnly(const HloValue& value) { - return IsConstantValue(value) || IsReadonlyEntryParameterValue(value); + return IsConstantValue(value) || IsEntryParameterValue(value); } // Data structure describing the action which should be taken on parts of a @@ -334,81 +332,6 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, return Status::OK(); } -// Conservatively adds copies before root instruction of entry computation and -// each aliased parameter to resolve interference of aliased input and output -// buffer. We later rely on the CopyRemover to drop the unnecessary ones. -Status AddCopiesForAliasedInputOutputs(HloModule* module) { - HloComputation* entry = module->entry_computation(); - HloInstruction* root = entry->root_instruction(); - - ShapeTree output_indices_to_copy(root->shape()); - std::vector> copied_parameters; - bool has_alias = false; - for (auto* param : entry->parameter_instructions()) { - bool param_has_alias = false; - ShapeTree param_indices_to_copy(param->shape()); - - module->input_output_alias_config().ForEachAlias( - [&](const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index) { - if (param_number == param->parameter_number()) { - param_has_alias = true; - *(param_indices_to_copy.mutable_element(param_index)) = true; - *(output_indices_to_copy.mutable_element(output_index)) = true; - } - }); - - if (!param_has_alias) { - continue; - } - - has_alias = true; - // Store a snapshot of users before DeepCopyInstruction, as - // DeepCopyInstruction introduces new users of the instruction. - std::vector users = param->users(); - ShapeTree param_copy_tree(param->shape(), - /*init_value=*/nullptr); - TF_ASSIGN_OR_RETURN(HloInstruction * copied, - entry->DeepCopyInstruction( - param, ¶m_indices_to_copy, ¶m_copy_tree)); - for (HloInstruction* user : users) { - TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied)); - } - - copied_parameters.push_back(param_copy_tree); - } - - if (!has_alias) { - return Status::OK(); - } - - // Add copies before root instruction. - ShapeTree output_copy_tree(root->shape(), - /*init_value=*/nullptr); - - TF_ASSIGN_OR_RETURN(HloInstruction * root_copied, - root->parent()->DeepCopyInstruction( - root, &output_indices_to_copy, &output_copy_tree)); - - // Add control dependencies between the input/output copies. - TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus( - [&](const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& input_index) -> Status { - HloInstruction* from = - copied_parameters[param_number].element(input_index); - HloInstruction* to = output_copy_tree.element(output_index); - - TF_RET_CHECK(from != nullptr); - TF_RET_CHECK(to != nullptr); - TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to)); - return Status::OK(); - })); - - entry->set_root_instruction(root_copied); - - return Status::OK(); -} - // Removes any control dependencies to or from the given instruction. Status StripControlDependenciesFrom(HloInstruction* instruction) { while (!instruction->control_successors().empty()) { @@ -1030,8 +953,6 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) { } } } - - TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module)); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 3096206c34..892d0d7b54 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1351,189 +1351,6 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) { EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); } -TEST_F(CopyInsertionTest, CrossingParameters) { - // Test a case where two parameters' dataflow cross with each other while - // input and output are aliased with same index: - // - // (p0 , p1) - // | \ /| - // | \ / | - // alias X alias - // | / \ | - // | / \| - // (p1 , p0) - auto module = CreateNewModule(); - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0})); - module->AddEntryComputation(builder.Build()); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); - InsertCopies(module.get()); - - EXPECT_EQ(CountCopies(*module), 4); -} - -TEST_F(CopyInsertionTest, ParametersAliasing) { - // Test a case where two parameters' dataflow don't interfere with each other - // while aliased. - // - // (p0 , p1) - // | | - // | | - // alias alias - // | | - // | | - // (p0 , p1) - auto module = CreateNewModule(); - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); - module->AddEntryComputation(builder.Build()); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); - InsertCopies(module.get()); - - EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(op::GetTupleElement(param, 0)), - op::Copy(op::GetTupleElement(param, 1)))); - - EXPECT_EQ(CountCopies(*module), 2); -} - -TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) { - // Test a case where one parameter is aliased with result while another one - // isn't. - // - // (p0 , p1) - // | | - // | | - // alias | - // | | - // | | - // (p0 , p1) - auto module = CreateNewModule(); - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); - module->AddEntryComputation(builder.Build()); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - InsertCopies(module.get()); - - EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(op::GetTupleElement(param, 0)), - op::Copy(op::GetTupleElement(param, 1)))); - - EXPECT_EQ(CountCopies(*module), 2); -} - -TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) { - // Test a case where one parameter is aliased with result while another one - // isn't. - // - // +-- (p0 , p1) - // | | | - // | | | - // alias Negate Negate - // | | | - // | | | - // +-- (p0 , p1) - auto module = CreateNewModule(); - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - - auto negate0 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); - - auto negate1 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); - builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); - module->AddEntryComputation(builder.Build()); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - InsertCopies(module.get()); - - EXPECT_EQ(CountCopies(*module), 0); -} - -TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) { - // Test a case where one parameter is aliased with result while another one - // isn't. - // - // +-- (p0 , p1) - // | | | - // | | | - // alias Negate Negate - // | | | - // | Add----+ - // | | | - // +-- (p0 , p1) - auto module = CreateNewModule(); - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - - auto negate0 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); - - auto negate1 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); - - auto add = builder.AddInstruction(HloInstruction::CreateBinary( - scalar_shape_, HloOpcode::kAdd, negate0, negate1)); - builder.AddInstruction(HloInstruction::CreateTuple({add, negate1})); - module->AddEntryComputation(builder.Build()); - ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - InsertCopies(module.get()); - - EXPECT_EQ(CountCopies(*module), 0); -} - TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) { // Test a while instruction with a body which permutes its tuple parameter // elements and applies one operation to one of the elements. The addition of diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 82c8fb1904..a0eb9e6ddc 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -225,32 +225,6 @@ message HloScheduleProto { map sequences = 1; } -message HloInputOutputAliasProto { - // The following proto describes a pair of aliased an input - // (described by parameter number and a ShapeIndex of the parameter) - // and an output (described by a ShapeIndex of the root - // instruction). For example: - // - // entry = { - // output_shape_index={1}, - // parameter_number=0, - // parameter_shape_index={1, 2}, - // } - // - // This entry indicates that the first paremter's {1, 2} element is - // aliased with the {1} element of the root instruction. - message AliasEntryProto { - // ShapeIndex of the root hlo. - repeated int64 output_shape_index = 1; - // Number of the parameter in entry computation. - int64 parameter_number = 2; - // ShapeIndex of the parameter instruction. - repeated int64 parameter_shape_index = 3; - } - - repeated AliasEntryProto entries = 1; -} - // Serialization of HloModule. message HloModuleProto { string name = 1; @@ -269,9 +243,6 @@ message HloModuleProto { // The schedule for this module. HloScheduleProto schedule = 7; - - // Describes alias information between inputs and outputs. - HloInputOutputAliasProto input_output_alias = 8; } // Serialization of LogicalBuffer. diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index cf8e6594cb..c3da12e273 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -59,9 +59,8 @@ class BufferValueMap { // construction process. using BufferNumber = int64; - explicit BufferValueMap(HloModule* module, - const HloDataflowAnalysis& dataflow) - : module_(module), dataflow_(dataflow) { + explicit BufferValueMap(const HloDataflowAnalysis& dataflow) + : dataflow_(dataflow) { buffers_.reserve(dataflow_.values().size()); value_to_buffer_number_.reserve(dataflow_.values().size()); for (const HloValue* value : dataflow_.values()) { @@ -172,42 +171,6 @@ class BufferValueMap { return value_to_buffer_number_.at(&value); } - void ComputeInputOutputAliasedBuffers( - const HloValue& value, std::vector* aliased_buffers) { - // Get parameter value from an aliased_input object. - const auto get_parameter_value = - [this](const std::pair& aliased_input) - -> const HloValue& { - int64 param_number = aliased_input.first; - const ShapeIndex& param_index = aliased_input.second; - return dataflow_.GetUniqueValueAt( - module_->entry_computation()->parameter_instruction(param_number), - param_index); - }; - - // If the value shows up in a root instruction, alias it with parameter - // intruction. - for (const HloPosition& pos : value.positions()) { - if (pos.instruction == module_->entry_computation()->root_instruction()) { - ShapeIndex output_index = pos.index; - - auto aliased_input = - module_->input_output_alias_config().GetAliasedParameter( - output_index); - if (aliased_input) { - aliased_buffers->push_back( - GetBufferForValue(get_parameter_value(*aliased_input))); - } - } - } - - // If the value is parameter instruction itself, alias it with itself. - if (value.instruction()->opcode() == HloOpcode::kParameter && - value.instruction()->parent() == module_->entry_computation()) { - aliased_buffers->push_back(GetBufferForValue(value)); - } - } - void ComputeWhileAliasedBuffers(const HloValue& value, std::vector* aliased_buffers) { VLOG(3) << "Compute kWhile aliases"; @@ -315,7 +278,6 @@ class BufferValueMap { VLOG(2) << "Use of value " << value.ToShortString() << ": " << use; } std::vector aliased_buffers; - ComputeInputOutputAliasedBuffers(value, &aliased_buffers); ComputeWhileAliasedBuffers(value, &aliased_buffers); ComputeConditionalAliasedBuffers(value, &aliased_buffers); // Uniquify aliased buffers. @@ -326,8 +288,6 @@ class BufferValueMap { return aliased_buffers; } - HloModule* module_; - // Dataflow analysis used to construct the buffer map. const HloDataflowAnalysis& dataflow_; @@ -501,7 +461,7 @@ StatusOr> HloAliasAnalysis::Run( /*bitcast_defines_value=*/false, fusion_can_share_buffer)); - BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis()); + BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); buffer_map.MergeAliasedBuffers(); // Create a vector of HloBuffers, one for each set of values in the diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index 5c8d97b2d1..0cd0ab36fc 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -217,181 +217,6 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) { EXPECT_FALSE(AnyValuesInSameBufferInterfere()); } -TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) { - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - - auto negate0 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); - auto negate1 = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); - - auto tuple = - builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); - module_->AddEntryComputation(builder.Build()); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); - - // Cannot alias an output twice. - ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); - - const HloAliasAnalysis& analysis = RunAnalysis(); - - EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), - analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); - - EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), - analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); -} - -TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) { - // parameter 0 aliased with output 1 and parameter 1 aliased with output 0. - // - // (p0 , p1) - // \ / - // \ / - // alias X - // / \ - // / \ - // (p0 , p1) - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); - auto tuple = - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); - module_->AddEntryComputation(builder.Build()); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1})); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); - - // Cannot alias an output twice. - ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); - - const HloAliasAnalysis& analysis = RunAnalysis(); - - // Every Ops in this graph are aliased with each other. - EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), - analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); - EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), - analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); - - EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), - analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); - EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), - analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); -} - -TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) { - // Test a simple single while instruction can be aliased with input and output - // of the computation. - // - // body((F32[], F32[]) %tuple_param): - // %add = Add(%tuple_param{0}, %tuple_param{1}) - // return Tuple(%tuple_param{0}, %add) - // - // condition((F32[], F32[]) %tuple_param): - // return Constant(false) - // - // entry: - // %param1 = param1 - // %while = While(%param1, body, condition) - // %while_1 = GTE(%while, 0) - // %while_2 = GTE(%while, 1) - // %negate_1 = Negate(%while_1) - // %negate_2 = Negate(%while_2) - // return Tuple(negate_1, negate_2) - // - const Shape tuple_shape = - ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); - - // Element 0 passes transparently through the body. - auto body_builder = HloComputation::Builder("body"); - auto body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "param")); - auto body_element_0 = body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); - auto body_element_1 = body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); - auto add = body_builder.AddInstruction(HloInstruction::CreateBinary( - scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1)); - auto body_tuple = body_builder.AddInstruction( - HloInstruction::CreateTuple({body_element_0, add})); - HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); - - // Condition computation trivially returns a constant "false". - auto cond_builder = HloComputation::Builder("condition"); - auto cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "param")); - cond_builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(false))); - HloComputation* condition = - module_->AddEmbeddedComputation(cond_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "p0")); - - auto xla_while = builder.AddInstruction( - HloInstruction::CreateWhile(tuple_shape, condition, body, param)); - auto while_element_1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0)); - auto while_element_2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1)); - auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, while_element_1)); - auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, while_element_2)); - auto tuple = - builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2})); - module_->AddEntryComputation(builder.Build()); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); - TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( - /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); - - const HloAliasAnalysis& analysis = RunAnalysis(); - - EXPECT_THAT( - GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), - UnorderedElementsAre(GetValueDefinedAt(param, {1}), - GetValueDefinedAt(xla_while, /*index=*/{1}), - GetValueDefinedAt(body_param, {1}), - GetValueDefinedAt(cond_param, {1}), - GetValueDefinedAt(add), - GetValueDefinedAt(negate_2))); - - EXPECT_THAT( - analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(), - UnorderedElementsAre( - HloPosition{param, {1}}, HloPosition{xla_while, {1}}, - HloPosition{while_element_2, {}}, HloPosition{body_param, {1}}, - HloPosition{body_element_1, {}}, HloPosition{add, {}}, - HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}}, - HloPosition{cond_param, {1}}, HloPosition{negate_2, {}})); - - EXPECT_FALSE(AnyValuesInSameBufferInterfere()); -} - TEST_F(HloAliasAnalysisTest, SingleCall) { // Test a single call of a subcomputation. The subcomputation adds its two // array-shaped parameters. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index f401eac016..c22adcdd8d 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction, const HloValue& HloDataflowAnalysis::GetValueDefinedAt( const HloInstruction* instruction, const ShapeIndex& index) const { - CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString(); + CHECK(ValueIsDefinedAt(instruction, index)); return GetUniqueValueAt(instruction, index); } diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc deleted file mode 100644 index 9ad98e5038..0000000000 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ /dev/null @@ -1,172 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" -#include "tensorflow/compiler/xla/service/hlo_module.h" - -namespace xla { -Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, - int64 param_number, - const ShapeIndex& param_index) { - // Output can't be aliased with multiple parameters. - TF_RET_CHECK(!alias_.element(output_index)); - (*alias_.mutable_element(output_index)) = - std::make_pair(param_number, param_index); - return Status::OK(); -} - -HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { - HloInputOutputAliasProto result; - alias_.ForEachElement( - [&](const ShapeIndex& index, - const absl::optional>& data) { - if (data) { - HloInputOutputAliasProto::AliasEntryProto entry; - for (int64 i : index) { - entry.add_output_shape_index(i); - } - entry.set_parameter_number(data->first); - for (int64 i : data->second) { - entry.add_parameter_shape_index(i); - } - result.add_entries()->Swap(&entry); - } - }); - return result; -} - -StatusOr HloInputOutputAliasConfig::CreateFromProto( - const HloModule* module, const HloInputOutputAliasProto& proto) { - HloInputOutputAliasConfig result( - module->entry_computation()->root_instruction()->shape()); - for (const HloInputOutputAliasProto::AliasEntryProto& entry : - proto.entries()) { - ShapeIndex output_index(entry.output_shape_index().begin(), - entry.output_shape_index().end()); - - int64 param_number = entry.parameter_number(); - ShapeIndex param_index(entry.parameter_shape_index().begin(), - entry.parameter_shape_index().end()); - TF_RETURN_IF_ERROR( - result.SetUpAlias(output_index, param_number, param_index)); - } - - return result; -} - -string HloInputOutputAliasConfig::ToString() const { - std::vector pieces; - pieces.push_back("HloInputOutputAliasConfig"); - - ForEachAlias([&](const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index) { - pieces.push_back(absl::StrFormat( - " OutputIndex %s is aliased with parameter %lld at %s:", - output_index.ToString(), param_number, param_index.ToString())); - }); - - return absl::StrJoin(pieces, "\n"); -} - -bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const { - bool output = false; - alias_.ForEachElement( - [&](const xla::ShapeIndex&, - absl::optional> alias) { - if (alias && alias->first == param_number) { - output = true; - } - }); - return output; -} - -absl::optional HloInputOutputAliasConfig::GetAliasedOutput( - int64 param_number, const ShapeIndex& param_index) const { - absl::optional output; - alias_.ForEachElement( - [&](const xla::ShapeIndex& output_index, - absl::optional> alias) { - if (alias && alias->first == param_number && - alias->second == param_index) { - output = output_index; - } - }); - return output; -} - -absl::optional> -HloInputOutputAliasConfig::GetAliasedParameter( - const ShapeIndex& output_index) const { - CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)); - return alias_.element(output_index); -} - -void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const { - alias_.ForEachElement( - [&](const ShapeIndex& output_index, - absl::optional> aliased) { - if (aliased) { - fn(output_index, aliased->first, aliased->second); - } - }); -} - -Status HloInputOutputAliasConfig::ForEachAliasWithStatus( - AliasFnWithStatus fn) const { - return alias_.ForEachElementWithStatus( - [&](const ShapeIndex& output_index, - absl::optional> aliased) { - if (aliased) { - TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second)); - } - return Status::OK(); - }); -} - -Status HloInputOutputAliasConfig::Verify(const HloModule& module) const { - std::vector> param_has_seen; - const HloComputation* entry = module.entry_computation(); - for (int64 i = 0; i < entry->num_parameters(); ++i) { - HloInstruction* param = entry->parameter_instruction(i); - param_has_seen.emplace_back(param->shape()); - } - return ForEachAliasWithStatus([&](const ShapeIndex& output_index, - int64 param_number, - const ShapeIndex& param_index) -> Status { - const HloInstruction* root = entry->root_instruction(); - - const Shape& param_shape = - entry->parameter_instruction(param_number)->shape(); - const Shape& output_shape = root->shape(); - TF_RET_CHECK(entry->num_parameters() > param_number); - TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index)); - TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index)); - - // Check each param_number and param_index pair only show up once. No - // input can be aliased with output buffers. - TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false); - - *(param_has_seen[param_number].mutable_element(param_index)) = true; - - return Status::OK(); - }); -} - -std::ostream& operator<<(std::ostream& out, - const HloInputOutputAliasConfig& config) { - out << config.ToString(); - return out; -} -} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h deleted file mode 100644 index 02c46f65c8..0000000000 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ - -#include - -#include "absl/types/optional.h" -#include "tensorflow/compiler/xla/service/hlo.pb.h" -#include "tensorflow/compiler/xla/shape_tree.h" -#include "tensorflow/compiler/xla/shape_util.h" - -namespace xla { - -class HloModule; - -// This class specifies the alias map from output index to parameter number and -// parameter index in the entry computation. -class HloInputOutputAliasConfig { - public: - HloInputOutputAliasConfig() = default; - - explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {} - - virtual ~HloInputOutputAliasConfig() = default; - - // Sets up alias config from `output_index` to `param_index` at - // `param_number`. - Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index); - - // Returns true if the given parameter is aliased with one of the output - // buffers. - bool ParameterHasAlias(int64 param_number) const; - - // (De)Serializes an HloInputOutoutAliasConfig to/from an - // HloInputOutoutAliasProto. - HloInputOutputAliasProto ToProto() const; - - static StatusOr CreateFromProto( - const HloModule* module, const HloInputOutputAliasProto& proto); - - // Returns the output index that the given parameter and parameter index is - // aliased with. A nullopt is returned if there is no output that is aliased - // with the parameter number and index. - absl::optional GetAliasedOutput( - int64 param_number, const ShapeIndex& param_index) const; - - // Returns the number of parameter and index of the parameter buffer that the - // given output buffer index is aliased with. A nullopt is returned if there - // is no parameter is aliased with the specific output. - absl::optional> GetAliasedParameter( - const ShapeIndex& output_index) const; - - using AliasFn = - std::function; - - // Iterates through each aliased output and input. - void ForEachAlias(AliasFn fn) const; - - using AliasFnWithStatus = - std::function; - - // Verifies that the given config is valid for the given module. - // Specifically, the config's input and output should be in-bound and size of - // the aliased buffers should match. - Status Verify(const HloModule& module) const; - - Status ForEachAliasWithStatus(AliasFnWithStatus fn) const; - - string ToString() const; - - private: - // A ShapeTree which indicates the list of buffers that's expected to be - // aliased. The key on this shape tree represents the output index. The value - // is a pair of parameter number and index into the buffer. If the value is - // nullopt, it means there is no parameter aliasing for this output. - ShapeTree>> alias_; -}; - -std::ostream& operator<<(std::ostream& out, - const HloInputOutputAliasConfig& config); - -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc deleted file mode 100644 index 3b61ff04e6..0000000000 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" - -#include -#include - -#include "absl/algorithm/container.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_dce.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h" -#include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/service/hlo_ordering.h" -#include "tensorflow/compiler/xla/service/hlo_parser.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" - -namespace xla { -namespace { -class HloInputOutputAliasConfigTest : public HloTestBase { - protected: - void expect_aliased(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index, - const HloInputOutputAliasConfig& config) { - absl::optional aliased_output = - config.GetAliasedOutput(param_number, param_index); - - EXPECT_TRUE(aliased_output); - EXPECT_EQ(aliased_output.value(), output_index); - - absl::optional> aliased_param = - config.GetAliasedParameter(output_index); - - EXPECT_TRUE(aliased_param); - EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index)); - } - - void expect_not_aliased(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index, - const HloInputOutputAliasConfig& config) { - absl::optional aliased_output = - config.GetAliasedOutput(param_number, param_index); - - EXPECT_FALSE(aliased_output && aliased_output == output_index); - - absl::optional> aliased_param = - config.GetAliasedParameter(output_index); - - EXPECT_FALSE(aliased_param && aliased_param->first == param_number && - aliased_param->second == param_index); - } -}; - -TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) { - const string module_str = R"( -HloModule TEST - -ENTRY main { - a = f32[] parameter(0) - b = f32[] parameter(1) - ROOT root = (f32[], f32[]) tuple(%a, %b) -} -)"; - TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - ParseHloString(module_str)); - - HloInputOutputAliasConfig config( - module->entry_computation()->root_instruction()->shape()); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, - /*param_index=*/{})); - - expect_aliased(/*output_index=*/{0}, /*param_number=*/1, - /*param_index=*/{}, config); - - expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, - /*param_index=*/{}, config); - - expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{}, config); -} - -TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) { - const string module_str = R"( -HloModule TEST - -ENTRY main { - param = (f32[], f32[]) parameter(0) - gte1 = f32[] get-tuple-element(%param), index=0 - gte2 = f32[] get-tuple-element(%param), index=1 - ROOT root = (f32[], f32[]) tuple(%gte1, %gte2) -} -)"; - TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - ParseHloString(module_str)); - - HloInputOutputAliasConfig config( - module->entry_computation()->root_instruction()->shape()); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{0})); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, - /*param_index=*/{1})); - - expect_aliased(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{0}, config); - - expect_aliased(/*output_index=*/{1}, /*param_number=*/0, - /*param_index=*/{1}, config); - - expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, - /*param_index=*/{}, config); - - expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{}, config); -} - -TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) { - const string module_str = R"( -HloModule TEST - -ENTRY main { - a = f32[] parameter(0) - b = f32[] parameter(1) - ROOT root = (f32[], f32[]) tuple(%a, %b) -} -)"; - TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - ParseHloString(module_str)); - - HloInputOutputAliasConfig config( - module->entry_computation()->root_instruction()->shape()); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{})); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, - /*param_index=*/{})); - - ASSERT_IS_NOT_OK(config.Verify(*module)); -} - -TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) { - const string module_str = R"( -HloModule TEST - -ENTRY main { - a = f32[] parameter(0) - b = f32[] parameter(1) - ROOT root = (f32[], f32[]) tuple(%a, %b) -} -)"; - TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - ParseHloString(module_str)); - - HloInputOutputAliasConfig config( - module->entry_computation()->root_instruction()->shape()); - - TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, - /*param_index=*/{})); - - ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, - /*param_index=*/{})); -} -} // namespace -} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 547f74a0ed..93e04eb3db 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -73,8 +73,6 @@ HloComputation* HloModule::AddComputationInternal( config_.SetDefaultComputationLayout( entry_computation_->ComputeProgramShape()); } - input_output_alias_config_ = HloInputOutputAliasConfig( - entry_computation_->root_instruction()->shape()); } if (uniquify_identifiers) { @@ -254,9 +252,6 @@ HloModuleProto HloModule::ToProto() const { if (has_schedule()) { *proto.mutable_schedule() = schedule().ToProto().ValueOrDie(); } - - *proto.mutable_input_output_alias() = input_output_alias_config().ToProto(); - return proto; } @@ -333,10 +328,6 @@ StatusOr> HloModule::CreateFromProto( } TF_RET_CHECK(module->entry_computation_ != nullptr); - TF_ASSIGN_OR_RETURN(module->input_output_alias_config_, - HloInputOutputAliasConfig::CreateFromProto( - module.get(), proto.input_output_alias())); - // Because we didn't uniquify the names or the ids, double-check that the // instruction and computation names and ids are unique from the proto. absl::flat_hash_set computation_names; diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 9b9dc3ba9f..735804e827 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/hlo_schedule.h" @@ -213,15 +212,6 @@ class HloModule { return result; } - // input_output_alias_config indicates the list of aliased buffers that are - // expected from the module. - HloInputOutputAliasConfig& input_output_alias_config() { - return input_output_alias_config_; - } - const HloInputOutputAliasConfig& input_output_alias_config() const { - return input_output_alias_config_; - } - // Returns the number of unique intruction ids given out. All ids up to // this point are guaranteed to be in the range [0..NumUniqueInstructionIds()) int NumUniqueInstructionIds() const { return next_unique_id_; } @@ -294,10 +284,6 @@ class HloModule { // sequential order of instructions for each non-fusion computation in the // module. absl::optional schedule_; - - // alias_config indicates the alias information of input/output buffers that - // are expected from the module. - HloInputOutputAliasConfig input_output_alias_config_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 2902a11a42..be3bee5975 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -1220,8 +1220,6 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(module->schedule().Verify()); } - TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module)); - return false; } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 51cedce7f0..73f541d505 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -72,7 +72,7 @@ class ShapeIndex { void push_back(int64 value) { indices_.push_back(value); } void pop_back() { indices_.pop_back(); } - // push_front is O(n), but shapes don't usually have a ton of dimensions. + // push_front is O(n^2), but shapes don't usually have a ton of dimensions. void push_front(int64 value) { indices_.insert(indices_.begin(), value); } using container_type = absl::InlinedVector; -- GitLab From ec82efd4ceb433e409ce518bd20c500076d79d10 Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Tue, 9 Oct 2018 15:03:46 +0100 Subject: [PATCH 110/411] [XLA] Query whether to enable XLA support on MacOS with no as a default --- configure.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.py b/configure.py index 89dc79b6b6..08e3c17b18 100644 --- a/configure.py +++ b/configure.py @@ -1566,7 +1566,6 @@ def main(): if is_macos(): environ_cp['TF_NEED_TENSORRT'] = '0' - environ_cp['TF_ENABLE_XLA'] = '0' # The numpy package on ppc64le uses OpenBLAS which has multi-threading # issues that lead to incorrect answers. Set OMP_NUM_THREADS=1 at @@ -1577,8 +1576,9 @@ def main(): set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite', 'with_ignite_support', True, 'ignite') + xla_enabled_by_default = is_linux() set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', - True, 'xla') + xla_enabled_by_default, 'xla') set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': -- GitLab From a9a44b070bf639ee9bd60f0fd21157a297cd7f82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 07:41:42 -0700 Subject: [PATCH 111/411] Removed unused load statements from the core BUILD. PiperOrigin-RevId: 216354906 --- tensorflow/core/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 900a0e11c4..acea8e2217 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -73,12 +73,10 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", "cc_header_only_library", - "full_path", "if_android", "if_ios", "if_linux_x86_64", "if_mobile", - "if_not_mobile", "if_not_windows", "if_windows", "tf_cc_test", -- GitLab From a0ed9452d5c7f897e26788d8dca5164cb6fba023 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 07:54:16 -0700 Subject: [PATCH 112/411] Fixing Toco for exporting graphs with strings If the graph contains not constant array with strings it fails because the array's size can't be estimated. PiperOrigin-RevId: 216356162 --- tensorflow/contrib/lite/toco/tooling_util.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index e3f27e9e2a..083a96ad9d 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1237,11 +1237,15 @@ void DedupeConstantArrays(Model* model, size_t min_size) { lhs_array.final_data_type != ArrayDataType::kNone ? lhs_array.final_data_type : lhs_array.data_type; - size_t array_byte_size = - lhs_array.buffer->Length() * ElementSize(final_data_type); - if (array_byte_size < min_size) { - // Too small; skip. - continue; + // Ignore small arrays, don't check string arrays because it is not possible + // to estimate its size. + if (final_data_type != ArrayDataType::kString) { + size_t array_byte_size = + lhs_array.buffer->Length() * ElementSize(final_data_type); + if (array_byte_size < min_size) { + // Too small; skip. + continue; + } } auto next_lhs_array_it = lhs_array_it; -- GitLab From cadcacc6224bcbb8a05bf3b70d625d9024a9c0f3 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 9 Oct 2018 08:16:49 -0700 Subject: [PATCH 113/411] Allowing for mixture of V1 and V2 feature columns usage in canned estimators. This is required for TF hub use cases where users might send in new feature columns to old model code. Implemented this support by making V2 feature columns support the V1 API. This is needed temporarily and would definitely be removed by TF 2.0, possibly earlier depending on what guarantees are provided by TF hub. The only case we don't allow here is mixing in V2 shared embedding columns with V1 Feature columns. V2 Shared FC's depend on a SharedEmbeddingState manager that would have to be passed in to the various API's and there wasn't really a very clean way to make that work. Mixing V2 feature columns with V1 shared embedding columns is fine though and along with all other combinations PiperOrigin-RevId: 216359041 --- .../canned/dnn_linear_combined_test.py | 107 +- .../estimator/canned/dnn_testing_utils.py | 109 + .../estimator/canned/linear_testing_utils.py | 64 + tensorflow/python/feature_column/BUILD | 1 + .../python/feature_column/feature_column.py | 4 + .../feature_column/feature_column_v2.py | 869 ++++- .../feature_column/feature_column_v2_test.py | 3294 ++++++++++++++--- 7 files changed, 3772 insertions(+), 676 deletions(-) diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py index ae968e717a..ab945d7b1a 100644 --- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py +++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py @@ -317,16 +317,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): writer_cache.FileWriterCache.clear() shutil.rmtree(self._model_dir) - def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, - input_dimension, label_dimension, batch_size, - fc_impl): - linear_feature_columns = [ - fc_impl.numeric_column('x', shape=(input_dimension,)) - ] - dnn_feature_columns = [ - fc_impl.numeric_column('x', shape=(input_dimension,)) - ] - feature_columns = linear_feature_columns + dnn_feature_columns + def _test_complete_flow_helper( + self, linear_feature_columns, dnn_feature_columns, feature_spec, + train_input_fn, eval_input_fn, predict_input_fn, input_dimension, + label_dimension, batch_size): est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), @@ -351,14 +345,63 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT - feature_spec = fc_impl.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir)) - def test_numpy_input_fn(self, fc_impl): + def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, + input_dimension, label_dimension, batch_size, + fc_impl): + linear_feature_columns = [ + fc_impl.numeric_column('x', shape=(input_dimension,)) + ] + dnn_feature_columns = [ + fc_impl.numeric_column('x', shape=(input_dimension,)) + ] + feature_columns = linear_feature_columns + dnn_feature_columns + feature_spec = fc_impl.make_parse_example_spec(feature_columns) + self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, + feature_spec, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, + label_dimension, batch_size) + + def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, + label_dimension, batch_size, fc_impl): + del fc_impl + linear_feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,)) + ] + dnn_feature_columns = [ + feature_column_v2.numeric_column('x', shape=(input_dimension,)) + ] + feature_columns = linear_feature_columns + dnn_feature_columns + feature_spec = feature_column.make_parse_example_spec(feature_columns) + self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, + feature_spec, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, + label_dimension, batch_size) + + def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, + label_dimension, batch_size, fc_impl): + del fc_impl + linear_feature_columns = [ + feature_column_v2.numeric_column('x', shape=(input_dimension,)) + ] + dnn_feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,)) + ] + feature_columns = linear_feature_columns + dnn_feature_columns + feature_spec = feature_column.make_parse_example_spec(feature_columns) + self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, + feature_spec, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, + label_dimension, batch_size) + + def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 @@ -381,7 +424,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): batch_size=batch_size, shuffle=False) - self._test_complete_flow( + fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, @@ -390,7 +433,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): batch_size=batch_size, fc_impl=fc_impl) - def test_pandas_input_fn(self, fc_impl): + def test_numpy_input_fn_basic(self, fc_impl): + self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow) + + def test_numpy_input_fn_mix1(self, fc_impl): + self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1) + + def test_numpy_input_fn_mix2(self, fc_impl): + self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2) + + def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return @@ -415,7 +467,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): batch_size=batch_size, shuffle=False) - self._test_complete_flow( + fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, @@ -424,7 +476,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): batch_size=batch_size, fc_impl=fc_impl) - def test_input_fn_from_parse_example(self, fc_impl): + def test_pandas_input_fn_basic(self, fc_impl): + self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow) + + def test_pandas_input_fn_mix1(self, fc_impl): + self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1) + + def test_pandas_input_fn_mix2(self, fc_impl): + self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2) + + def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 @@ -466,7 +527,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): features.pop('y') return features, None - self._test_complete_flow( + fn_to_run( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, @@ -475,6 +536,18 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase): batch_size=batch_size, fc_impl=fc_impl) + def test_input_fn_from_parse_example_basic(self, fc_impl): + self._test_input_fn_from_parse_example_helper(fc_impl, + self._test_complete_flow) + + def test_input_fn_from_parse_example_mix1(self, fc_impl): + self._test_input_fn_from_parse_example_helper(fc_impl, + self._test_complete_flow_mix1) + + def test_input_fn_from_parse_example_mix2(self, fc_impl): + self._test_input_fn_from_parse_example_helper(fc_impl, + self._test_complete_flow_mix2) + # A function to mimic dnn-classifier init reuse same tests. def _dnn_classifier_fn(hidden_units, diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index cd66d0a3bd..71d7e54783 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -34,6 +34,7 @@ from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.canned import prediction_keys from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column +from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -479,6 +480,60 @@ class BaseDNNModelFnTest(object): else: self.fail('Invalid mode: {}'.format(mode)) + def test_multi_feature_column_mix_multi_dim_logits(self): + """Tests multiple feature columns and multi-dimensional logits. + + All numbers are the same as test_multi_dim_input_multi_dim_logits. The only + difference is that the input consists of two 1D feature columns, instead of + one 2D feature column. + """ + base_global_step = 100 + create_checkpoint(( + ([[.6, .5], [-.6, -.5]], [.1, -.1]), + ([[1., .8], [-.8, -1.]], [.2, -.2]), + ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), + ), base_global_step, self._model_dir) + hidden_units = (2, 2) + logits_dimension = 3 + inputs = ([[10.]], [[8.]]) + expected_logits = [[-0.48, 0.48, 0.39]] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + with ops.Graph().as_default(): + training_util.create_global_step() + head = mock_head( + self, + hidden_units=hidden_units, + logits_dimension=logits_dimension, + expected_logits=expected_logits) + estimator_spec = self._dnn_model_fn( + features={ + 'age': constant_op.constant(inputs[0]), + 'height': constant_op.constant(inputs[1]) + }, + labels=constant_op.constant([[1]]), + mode=mode, + head=head, + hidden_units=hidden_units, + feature_columns=[ + feature_column.numeric_column('age'), + feature_column_v2.numeric_column('height') + ], + optimizer=mock_optimizer(self, hidden_units)) + with monitored_session.MonitoredTrainingSession( + checkpoint_dir=self._model_dir) as sess: + if mode == model_fn.ModeKeys.TRAIN: + sess.run(estimator_spec.train_op) + elif mode == model_fn.ModeKeys.EVAL: + sess.run(estimator_spec.loss) + elif mode == model_fn.ModeKeys.PREDICT: + sess.run(estimator_spec.predictions) + else: + self.fail('Invalid mode: {}'.format(mode)) + def test_features_tensor_raises_value_error(self): """Tests that passing a Tensor for features raises a ValueError.""" hidden_units = (2, 2) @@ -806,6 +861,60 @@ class BaseDNNLogitFnTest(object): checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) + def test_multi_feature_column_mix_multi_dim_logits(self): + """Tests multiple feature columns and multi-dimensional logits. + + All numbers are the same as test_multi_dim_input_multi_dim_logits. The only + difference is that the input consists of two 1D feature columns, instead of + one 2D feature column. + """ + base_global_step = 100 + create_checkpoint(( + ([[.6, .5], [-.6, -.5]], [.1, -.1]), + ([[1., .8], [-.8, -1.]], [.2, -.2]), + ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), + ), base_global_step, self._model_dir) + + hidden_units = (2, 2) + logits_dimension = 3 + inputs = ([[10.]], [[8.]]) + expected_logits = [[-0.48, 0.48, 0.39]] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + with ops.Graph().as_default(): + # Global step needed for MonitoredSession, which is in turn used to + # explicitly set variable weights through a checkpoint. + training_util.create_global_step() + # Use a variable scope here with 'dnn', emulating the dnn model_fn, so + # the checkpoint naming is shared. + with variable_scope.variable_scope('dnn'): + input_layer_partitioner = ( + partitioned_variables.min_max_variable_partitioner( + max_partitions=0, min_slice_size=64 << 20)) + logit_fn = self._dnn_logit_fn_builder( + units=logits_dimension, + hidden_units=hidden_units, + feature_columns=[ + feature_column.numeric_column('age'), + feature_column_v2.numeric_column('height') + ], + activation_fn=nn.relu, + dropout=None, + input_layer_partitioner=input_layer_partitioner, + batch_norm=False) + logits = logit_fn( + features={ + 'age': constant_op.constant(inputs[0]), + 'height': constant_op.constant(inputs[1]) + }, + mode=mode) + with monitored_session.MonitoredTrainingSession( + checkpoint_dir=self._model_dir) as sess: + self.assertAllClose(expected_logits, sess.run(logits)) + class BaseDNNWarmStartingTest(object): diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py index 827352a70b..2cfa2a8e15 100644 --- a/tensorflow/python/estimator/canned/linear_testing_utils.py +++ b/tensorflow/python/estimator/canned/linear_testing_utils.py @@ -400,6 +400,45 @@ class BaseLinearRegressorEvaluationTest(object): # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) + def test_evaluation_for_multiple_feature_columns_mix(self): + with ops.Graph().as_default(): + variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME) + variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME) + variables_lib.Variable([5.0], name=BIAS_NAME) + variables_lib.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + batch_size = 2 + feature_columns = [ + feature_column.numeric_column('age'), + feature_column_v2.numeric_column('height') + ] + + def _input_fn(): + features_ds = dataset_ops.Dataset.from_tensor_slices({ + 'age': np.array([20, 40]), + 'height': np.array([4, 8]) + }) + labels_ds = dataset_ops.Dataset.from_tensor_slices( + np.array([[213.], [421.]])) + return (dataset_ops.Dataset.zip((features_ds, labels_ds)) + .batch(batch_size).repeat(None)) + + est = self._linear_regressor_fn( + feature_columns=feature_columns, model_dir=self._model_dir) + + eval_metrics = est.evaluate(input_fn=_input_fn, steps=1) + self.assertItemsEqual( + (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, + metric_keys.MetricKeys.PREDICTION_MEAN, + metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP), + eval_metrics.keys()) + + # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = + # [213.0, 421.0], while label is [213., 421.]. Loss = 0. + self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) + class BaseLinearRegressorPredictTest(object): @@ -497,6 +536,31 @@ class BaseLinearRegressorPredictTest(object): # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores) + def testTwoFeatureColumnsMix(self): + """Tests predict with two feature columns.""" + with ops.Graph().as_default(): + variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights') + variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights') + variables_lib.Variable([.2], name=BIAS_NAME) + variables_lib.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + linear_regressor = self._linear_regressor_fn( + feature_columns=(feature_column.numeric_column('x0'), + feature_column_v2.numeric_column('x1')), + model_dir=self._model_dir) + + def _predict_input_fn(): + return dataset_ops.Dataset.from_tensor_slices({ + 'x0': np.array([[2.]]), + 'x1': np.array([[3.]]) + }).batch(1) + + predictions = linear_regressor.predict(input_fn=_predict_input_fn) + predicted_scores = list([x['predictions'] for x in predictions]) + # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 + self.assertAllClose([[80.2]], predicted_scores) + def testSparseCombiner(self): w_a = 2.0 w_b = 3.0 diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index ac53a84eef..82acde584e 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -54,6 +54,7 @@ py_library( srcs = ["feature_column_v2.py"], srcs_version = "PY2AND3", deps = [ + ":feature_column", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 28a8286544..8a11ca142c 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -121,6 +121,10 @@ Example of building model using FeatureColumns, this can be used in a NOTE: Functions prefixed with "_" indicate experimental or private parts of the API subject to change, and should not be relied upon! + +NOTE: The new feature columns are being developed in feature_column_v2.py and +are a somewhat duplicate of the code here. Please make sure to update logic +in both places. """ from __future__ import absolute_import diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index b79373c475..6d089de991 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -136,6 +136,7 @@ import six from tensorflow.python.eager import context +from tensorflow.python.feature_column import feature_column as fc_old from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib @@ -157,9 +158,16 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils +from tensorflow.python.util import deprecation from tensorflow.python.util import nest +_FEATURE_COLUMN_DEPRECATION_DATE = '2018-11-30' +_FEATURE_COLUMN_DEPRECATION = ('The old _FeatureColumn APIs are being ' + 'deprecated. Please use the new FeatureColumn ' + 'APIs instead.') + + class StateManager(object): """Manages the state associated with FeatureColumns. @@ -440,10 +448,6 @@ class FeatureLayer(Layer): return (input_shape[0], total_elements) -def _strip_leading_slashes(name): - return name.rsplit('/', 1)[-1] - - class LinearModel(Layer): """Produces a linear prediction `Tensor` based on given `feature_columns`. @@ -775,12 +779,12 @@ def embedding_column( categorical_column, dimension, combiner='mean', initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): - """`_DenseColumn` that converts from sparse, categorical input. + """`DenseColumn` that converts from sparse, categorical input. Use this when your inputs are sparse, but you want to convert them to a dense representation (e.g., to feed to a DNN). - Inputs must be a `_CategoricalColumn` created by any of the + Inputs must be a `CategoricalColumn` created by any of the `categorical_column_*` function. Here is an example of using `embedding_column` with `DNNClassifier`: @@ -814,12 +818,12 @@ def embedding_column( ``` Args: - categorical_column: A `_CategoricalColumn` created by a + categorical_column: A `CategoricalColumn` created by a `categorical_column_with_*` function. This column produces the sparse IDs that are inputs to the embedding lookup. dimension: An integer specifying dimension of the embedding, must be > 0. - combiner: A string specifying how to reduce if there are multiple entries - in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with + combiner: A string specifying how to reduce if there are multiple entries in + a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with 'mean' the default. 'sqrtn' often achieves good accuracy, in particular with bag-of-words columns. Each of this can be thought as example level normalizations on the column. For more information, see @@ -830,14 +834,14 @@ def embedding_column( `1/sqrt(dimension)`. ckpt_to_load_from: String representing checkpoint name/pattern from which to restore column weights. Required if `tensor_name_in_ckpt` is not `None`. - tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from - which to restore the column weights. Required if `ckpt_to_load_from` is - not `None`. + tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from which + to restore the column weights. Required if `ckpt_to_load_from` is not + `None`. max_norm: If not `None`, embedding values are l2-normalized to this value. trainable: Whether or not the embedding is trainable. Default is True. Returns: - `_DenseColumn` that converts from sparse input. + `DenseColumn` that converts from sparse input. Raises: ValueError: if `dimension` not > 0. @@ -1181,7 +1185,7 @@ def bucketized_column(source_column, boundaries): one-dimensional. ValueError: If `boundaries` is not a sorted list or tuple. """ - if not isinstance(source_column, NumericColumn): + if not isinstance(source_column, (NumericColumn, fc_old._NumericColumn)): # pylint: disable=protected-access raise ValueError( 'source_column must be a column generated with numeric_column(). ' 'Given: {}'.format(source_column)) @@ -1390,7 +1394,7 @@ def categorical_column_with_vocabulary_file(key, def categorical_column_with_vocabulary_list( key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0): - """A `_CategoricalColumn` with in-memory vocabulary. + """A `CategoricalColumn` with in-memory vocabulary. Use this when your inputs are in string or integer format, and you have an in-memory vocabulary mapping each value to an integer ID. By default, @@ -1439,14 +1443,14 @@ def categorical_column_with_vocabulary_list( ``` Args: - key: A unique string identifying the input feature. It is used as the - column name and the dictionary key for feature parsing configs, feature - `Tensor` objects, and feature columns. + key: A unique string identifying the input feature. It is used as the column + name and the dictionary key for feature parsing configs, feature `Tensor` + objects, and feature columns. vocabulary_list: An ordered iterable defining the vocabulary. Each feature is mapped to the index of its value (if present) in `vocabulary_list`. Must be castable to `dtype`. - dtype: The type of features. Only string and integer types are supported. - If `None`, it will be inferred from `vocabulary_list`. + dtype: The type of features. Only string and integer types are supported. If + `None`, it will be inferred from `vocabulary_list`. default_value: The integer ID value to return for out-of-vocabulary feature values, defaults to `-1`. This can not be specified with a positive `num_oov_buckets`. @@ -1604,7 +1608,7 @@ def indicator_column(categorical_column): def weighted_categorical_column( categorical_column, weight_feature_key, dtype=dtypes.float32): - """Applies weight values to a `_CategoricalColumn`. + """Applies weight values to a `CategoricalColumn`. Use this when each of your sparse inputs has both an ID and a value. For example, if you're representing text documents as a collection of word @@ -1655,7 +1659,7 @@ def weighted_categorical_column( the same indices and dense shape. Args: - categorical_column: A `_CategoricalColumn` created by + categorical_column: A `CategoricalColumn` created by `categorical_column_with_*` functions. weight_feature_key: String key for weight values. dtype: Type of weights, such as `tf.float32`. Only float and integer weights @@ -1788,12 +1792,13 @@ def crossed_column(keys, hash_bucket_size, hash_key=None): 'keys must be a list with length > 1. Given: {}'.format(keys)) for key in keys: if (not isinstance(key, six.string_types) and - not isinstance(key, CategoricalColumn)): + not isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn))): # pylint: disable=protected-access raise ValueError( 'Unsupported key type. All keys must be either string, or ' 'categorical column except HashedCategoricalColumn. ' 'Given: {}'.format(key)) - if isinstance(key, HashedCategoricalColumn): + if isinstance(key, + (HashedCategoricalColumn, fc_old._HashedCategoricalColumn)): # pylint: disable=protected-access raise ValueError( 'categorical_column_with_hash_bucket is not supported for crossing. ' 'Hashing before crossing will increase probability of collision. ' @@ -1882,6 +1887,16 @@ class FeatureColumn(object): """ pass + @abc.abstractproperty + def _is_v2_column(self): + """Returns whether this FeatureColumn is fully conformant to the new API. + + This is needed for composition type cases where an EmbeddingColumn etc. + might take in old categorical columns as input and then we want to use the + old API. + """ + pass + class DenseColumn(FeatureColumn): """Represents a column which can be represented as `Tensor`. @@ -1927,6 +1942,8 @@ def is_feature_column_v2(feature_columns): for feature_column in feature_columns: if not isinstance(feature_column, FeatureColumn): return False + if not feature_column._is_v2_column: # pylint: disable=protected-access + return False return True @@ -2201,19 +2218,6 @@ class FeatureTransformationCache(object): lambda: feature_tensor) -# TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py -def _shape_offsets(shape): - """Returns moving offset for each dimension given shape.""" - offsets = [] - for dim in reversed(shape): - if offsets: - offsets.append(dim * offsets[-1]) - else: - offsets.append(dim) - offsets.reverse() - return offsets - - # TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None): """Converts a `Tensor` to a `SparseTensor`, dropping ignore_value cells. @@ -2306,11 +2310,16 @@ def _normalize_feature_columns(feature_columns): class NumericColumn( DenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access collections.namedtuple( 'NumericColumn', ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))): """see `numeric_column`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -2325,6 +2334,27 @@ class NumericColumn( self.default_value) } + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + + def _transform_input_tensor(self, input_tensor): + if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): + raise ValueError( + 'The corresponding Tensor of numerical column must be a Tensor. ' + 'SparseTensor is not supported. key: {}'.format(self.key)) + if self.normalizer_fn is not None: + input_tensor = self.normalizer_fn(input_tensor) + return math_ops.to_float(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = inputs.get(self.key) + return self._transform_input_tensor(input_tensor) + def transform_feature(self, transformation_cache, state_manager): """See `FeatureColumn` base class. @@ -2342,19 +2372,19 @@ class NumericColumn( ValueError: If a SparseTensor is passed in. """ input_tensor = transformation_cache.get(self.key, state_manager) - if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): - raise ValueError( - 'The corresponding Tensor of numerical column must be a Tensor. ' - 'SparseTensor is not supported. key: {}'.format(self.key)) - if self.normalizer_fn is not None: - input_tensor = self.normalizer_fn(input_tensor) - return math_ops.to_float(input_tensor) + return self._transform_input_tensor(input_tensor) @property def variable_shape(self): """See `DenseColumn` base class.""" return tensor_shape.TensorShape(self.shape) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return self.variable_shape + def get_dense_tensor(self, transformation_cache, state_manager): """Returns dense `Tensor` representing numeric feature. @@ -2371,12 +2401,28 @@ class NumericColumn( # representation created by _transform_feature. return transformation_cache.get(self, state_manager) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + del weight_collections + del trainable + return inputs.get(self) + -class BucketizedColumn(DenseColumn, CategoricalColumn, - collections.namedtuple('BucketizedColumn', - ('source_column', 'boundaries'))): +class BucketizedColumn( + DenseColumn, + CategoricalColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + fc_old._CategoricalColumn, # pylint: disable=protected-access + collections.namedtuple('BucketizedColumn', + ('source_column', 'boundaries'))): """See `bucketized_column`.""" + @property + def _is_v2_column(self): + return (isinstance(self.source_column, FeatureColumn) and + self.source_column._is_v2_column) # pylint: disable=protected-access + @property def name(self): """See `FeatureColumn` base class.""" @@ -2387,6 +2433,21 @@ class BucketizedColumn(DenseColumn, CategoricalColumn, """See `FeatureColumn` base class.""" return self.source_column.parse_example_spec + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.source_column._parse_example_spec # pylint: disable=protected-access + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + """Returns bucketized categorical `source_column` tensor.""" + source_tensor = inputs.get(self.source_column) + return math_ops._bucketize( # pylint: disable=protected-access + source_tensor, + boundaries=self.boundaries) + def transform_feature(self, transformation_cache, state_manager): """Returns bucketized categorical `source_column` tensor.""" source_tensor = transformation_cache.get(self.source_column, state_manager) @@ -2400,24 +2461,45 @@ class BucketizedColumn(DenseColumn, CategoricalColumn, return tensor_shape.TensorShape( tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) - def get_dense_tensor(self, transformation_cache, state_manager): - """Returns one hot encoded dense `Tensor`.""" - input_tensor = transformation_cache.get(self, state_manager) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return self.variable_shape + + def _get_dense_tensor_for_input_tensor(self, input_tensor): return array_ops.one_hot( indices=math_ops.to_int64(input_tensor), depth=len(self.boundaries) + 1, on_value=1., off_value=0.) + def get_dense_tensor(self, transformation_cache, state_manager): + """Returns one hot encoded dense `Tensor`.""" + input_tensor = transformation_cache.get(self, state_manager) + return self._get_dense_tensor_for_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + del weight_collections + del trainable + input_tensor = inputs.get(self) + return self._get_dense_tensor_for_input_tensor(input_tensor) + @property def num_buckets(self): """See `CategoricalColumn` base class.""" # By construction, source_column is always one-dimensional. return (len(self.boundaries) + 1) * self.source_column.shape[0] - def get_sparse_tensors(self, transformation_cache, state_manager): - """Converts dense inputs to SparseTensor so downstream code can use it.""" - input_tensor = transformation_cache.get(self, state_manager) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + + def _get_sparse_tensors_for_input_tensor(self, input_tensor): batch_size = array_ops.shape(input_tensor)[0] # By construction, source_column is always one-dimensional. source_dimension = self.source_column.shape[0] @@ -2443,15 +2525,38 @@ class BucketizedColumn(DenseColumn, CategoricalColumn, dense_shape=dense_shape) return CategoricalColumn.IdWeightPair(sparse_tensor, None) + def get_sparse_tensors(self, transformation_cache, state_manager): + """Converts dense inputs to SparseTensor so downstream code can use it.""" + input_tensor = transformation_cache.get(self, state_manager) + return self._get_sparse_tensors_for_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + """Converts dense inputs to SparseTensor so downstream code can use it.""" + del weight_collections + del trainable + input_tensor = inputs.get(self) + return self._get_sparse_tensors_for_input_tensor(input_tensor) + class EmbeddingColumn( - DenseColumn, SequenceDenseColumn, + DenseColumn, + SequenceDenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + fc_old._SequenceDenseColumn, # pylint: disable=protected-access collections.namedtuple( 'EmbeddingColumn', ('categorical_column', 'dimension', 'combiner', 'initializer', 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): """See `embedding_column`.""" + @property + def _is_v2_column(self): + return (isinstance(self.categorical_column, FeatureColumn) and + self.categorical_column._is_v2_column) # pylint: disable=protected-access + @property def name(self): """See `FeatureColumn` base class.""" @@ -2462,18 +2567,35 @@ class EmbeddingColumn( """See `FeatureColumn` base class.""" return self.categorical_column.parse_example_spec + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec # pylint: disable=protected-access + def transform_feature(self, transformation_cache, state_manager): """Transforms underlying `categorical_column`.""" return transformation_cache.get(self.categorical_column, state_manager) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + return inputs.get(self.categorical_column) + @property def variable_shape(self): """See `DenseColumn` base class.""" return tensor_shape.vector(self.dimension) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return self.variable_shape + def create_state(self, state_manager): """Creates the embedding lookup variable.""" - embedding_shape = (self.categorical_column.num_buckets, self.dimension) + embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access state_manager.create_variable( self, name='embedding_weights', @@ -2482,17 +2604,11 @@ class EmbeddingColumn( trainable=self.trainable, initializer=self.initializer) - def _get_dense_tensor_internal(self, transformation_cache, state_manager): - """Private method that follows the signature of _get_dense_tensor.""" - # Get sparse IDs and weights. - sparse_tensors = self.categorical_column.get_sparse_tensors( - transformation_cache, state_manager) + def _get_dense_tensor_internal_helper(self, sparse_tensors, + embedding_weights): sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor - embedding_weights = state_manager.get_variable( - self, name='embedding_weights') - if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): @@ -2510,6 +2626,30 @@ class EmbeddingColumn( name='%s_weights' % self.name, max_norm=self.max_norm) + def _get_dense_tensor_internal(self, sparse_tensors, state_manager): + """Private method that follows the signature of get_dense_tensor.""" + embedding_weights = state_manager.get_variable( + self, name='embedding_weights') + return self._get_dense_tensor_internal_helper(sparse_tensors, + embedding_weights) + + def _old_get_dense_tensor_internal(self, sparse_tensors, weight_collections, + trainable): + """Private method that follows the signature of _get_dense_tensor.""" + embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access + if (weight_collections and + ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections): + weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) + embedding_weights = variable_scope.get_variable( + name='embedding_weights', + shape=embedding_shape, + dtype=dtypes.float32, + initializer=self.initializer, + trainable=self.trainable and trainable, + collections=weight_collections) + return self._get_dense_tensor_internal_helper(sparse_tensors, + embedding_weights) + def get_dense_tensor(self, transformation_cache, state_manager): """Returns tensor after doing the embedding lookup. @@ -2535,7 +2675,30 @@ class EmbeddingColumn( 'sequence_input_layer instead of input_layer. ' 'Given (type {}): {}'.format(self.name, type(self.categorical_column), self.categorical_column)) - return self._get_dense_tensor_internal(transformation_cache, state_manager) + # Get sparse IDs and weights. + sparse_tensors = self.categorical_column.get_sparse_tensors( + transformation_cache, state_manager) + return self._get_dense_tensor_internal(sparse_tensors, state_manager) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + if isinstance( + self.categorical_column, + (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)): # pylint: disable=protected-access + raise ValueError( + 'In embedding_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) + sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access + inputs, weight_collections, trainable) + return self._old_get_dense_tensor_internal(sparse_tensors, + weight_collections, trainable) def get_sequence_dense_tensor(self, transformation_cache, state_manager): """See `SequenceDenseColumn` base class.""" @@ -2547,21 +2710,40 @@ class EmbeddingColumn( 'Suggested fix: Use one of sequence_categorical_column_with_*. ' 'Given (type {}): {}'.format(self.name, type(self.categorical_column), self.categorical_column)) - dense_tensor = self._get_dense_tensor_internal( # pylint: disable=protected-access + sparse_tensors = self.categorical_column.get_sequence_sparse_tensors( transformation_cache, state_manager) - sparse_tensors = self.categorical_column.get_sparse_tensors( - transformation_cache, state_manager) - sequence_length = _sequence_length_from_sparse_tensor( + dense_tensor = self._get_dense_tensor_internal(sparse_tensors, + state_manager) + sequence_length = fc_old._sequence_length_from_sparse_tensor( # pylint: disable=protected-access sparse_tensors.id_tensor) return SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) - -def _get_graph_for_variable(var): - if isinstance(var, variables.PartitionedVariable): - return list(var)[0].graph - else: - return var.graph + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sequence_dense_tensor(self, + inputs, + weight_collections=None, + trainable=None): + if not isinstance( + self.categorical_column, + (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)): # pylint: disable=protected-access + raise ValueError( + 'In embedding_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access + dense_tensor = self._old_get_dense_tensor_internal( + sparse_tensors, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = fc_old._sequence_length_from_sparse_tensor( # pylint: disable=protected-access + sparse_tensors.id_tensor) + return SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) class SharedEmbeddingStateManager(Layer): @@ -2633,8 +2815,17 @@ def maybe_create_shared_state_manager(feature_columns): return None +def _raise_shared_embedding_column_error(): + raise ValueError('SharedEmbeddingColumns are not supported in ' + '`linear_model` or `input_layer`. Please use ' + '`FeatureLayer` or `LinearModel` instead.') + + class SharedEmbeddingColumn( - DenseColumn, SequenceDenseColumn, + DenseColumn, + SequenceDenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + fc_old._SequenceDenseColumn, # pylint: disable=protected-access collections.namedtuple( 'SharedEmbeddingColumn', ('categorical_column', 'dimension', 'combiner', 'initializer', @@ -2642,6 +2833,10 @@ class SharedEmbeddingColumn( 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): """See `embedding_column`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -2662,15 +2857,26 @@ class SharedEmbeddingColumn( """See `FeatureColumn` base class.""" return self.categorical_column.parse_example_spec + @property + def _parse_example_spec(self): + return _raise_shared_embedding_column_error() + def transform_feature(self, transformation_cache, state_manager): """See `FeatureColumn` base class.""" return transformation_cache.get(self.categorical_column, state_manager) + def _transform_feature(self, inputs): + return _raise_shared_embedding_column_error() + @property def variable_shape(self): """See `DenseColumn` base class.""" return tensor_shape.vector(self.dimension) + @property + def _variable_shape(self): + return _raise_shared_embedding_column_error() + def create_state(self, state_manager): """Creates the shared embedding lookup variable.""" if not isinstance(state_manager, SharedEmbeddingStateManager): @@ -2731,6 +2937,9 @@ class SharedEmbeddingColumn( self.categorical_column)) return self._get_dense_tensor_internal(transformation_cache, state_manager) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + return _raise_shared_embedding_column_error() + def get_sequence_dense_tensor(self, transformation_cache, state_manager): """See `SequenceDenseColumn` base class.""" if not isinstance(self.categorical_column, SequenceCategoricalColumn): @@ -2745,11 +2954,17 @@ class SharedEmbeddingColumn( state_manager) sparse_tensors = self.categorical_column.get_sparse_tensors( transformation_cache, state_manager) - sequence_length = _sequence_length_from_sparse_tensor( + sequence_length = fc_old._sequence_length_from_sparse_tensor( # pylint: disable=protected-access sparse_tensors.id_tensor) return SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) + def _get_sequence_dense_tensor(self, + inputs, + weight_collections=None, + trainable=None): + return _raise_shared_embedding_column_error() + def _create_tuple(shape, value): """Returns a tuple with given shape and filled with value.""" @@ -2858,10 +3073,15 @@ def _check_default_value(shape, default_value, dtype, key): class HashedCategoricalColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple('HashedCategoricalColumn', ('key', 'hash_bucket_size', 'dtype'))): """see `categorical_column_with_hash_bucket`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -2872,10 +3092,14 @@ class HashedCategoricalColumn( """See `FeatureColumn` base class.""" return {self.key: parsing_ops.VarLenFeature(self.dtype)} - def transform_feature(self, transformation_cache, state_manager): + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + + def _transform_input_tensor(self, input_tensor): """Hashes the values in the feature_column.""" - input_tensor = _to_sparse_input_and_drop_ignore_values( - transformation_cache.get(self.key, state_manager)) if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError('SparseColumn input must be a SparseTensor.') @@ -2899,24 +3123,55 @@ class HashedCategoricalColumn( return sparse_tensor_lib.SparseTensor( input_tensor.indices, sparse_id_values, input_tensor.dense_shape) + def transform_feature(self, transformation_cache, state_manager): + """Hashes the values in the feature_column.""" + input_tensor = _to_sparse_input_and_drop_ignore_values( + transformation_cache.get(self.key, state_manager)) + return self._transform_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key)) + return self._transform_input_tensor(input_tensor) + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return self.hash_bucket_size + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" return CategoricalColumn.IdWeightPair( transformation_cache.get(self, state_manager), None) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + del weight_collections + del trainable + return CategoricalColumn.IdWeightPair(inputs.get(self), None) + class VocabularyFileCategoricalColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple('VocabularyFileCategoricalColumn', ('key', 'vocabulary_file', 'vocabulary_size', 'num_oov_buckets', 'dtype', 'default_value'))): """See `categorical_column_with_vocabulary_file`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -2927,11 +3182,14 @@ class VocabularyFileCategoricalColumn( """See `FeatureColumn` base class.""" return {self.key: parsing_ops.VarLenFeature(self.dtype)} - def transform_feature(self, transformation_cache, state_manager): - """Creates a lookup table for the vocabulary.""" - input_tensor = _to_sparse_input_and_drop_ignore_values( - transformation_cache.get(self.key, state_manager)) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + def _transform_input_tensor(self, input_tensor): + """Creates a lookup table for the vocabulary.""" if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( 'Column dtype and SparseTensors dtype must be compatible. ' @@ -2957,25 +3215,56 @@ class VocabularyFileCategoricalColumn( key_dtype=key_dtype, name='{}_lookup'.format(self.key)).lookup(input_tensor) + def transform_feature(self, transformation_cache, state_manager): + """Creates a lookup table for the vocabulary.""" + input_tensor = _to_sparse_input_and_drop_ignore_values( + transformation_cache.get(self.key, state_manager)) + return self._transform_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key)) + return self._transform_input_tensor(input_tensor) + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return self.vocabulary_size + self.num_oov_buckets + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" return CategoricalColumn.IdWeightPair( transformation_cache.get(self, state_manager), None) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + del weight_collections + del trainable + return CategoricalColumn.IdWeightPair(inputs.get(self), None) + class VocabularyListCategoricalColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple( 'VocabularyListCategoricalColumn', ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets')) ): """See `categorical_column_with_vocabulary_list`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -2986,11 +3275,14 @@ class VocabularyListCategoricalColumn( """See `FeatureColumn` base class.""" return {self.key: parsing_ops.VarLenFeature(self.dtype)} - def transform_feature(self, transformation_cache, state_manager): - """Creates a lookup table for the vocabulary list.""" - input_tensor = _to_sparse_input_and_drop_ignore_values( - transformation_cache.get(self.key, state_manager)) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + def _transform_input_tensor(self, input_tensor): + """Creates a lookup table for the vocabulary list.""" if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( 'Column dtype and SparseTensors dtype must be compatible. ' @@ -3015,24 +3307,55 @@ class VocabularyListCategoricalColumn( dtype=key_dtype, name='{}_lookup'.format(self.key)).lookup(input_tensor) + def transform_feature(self, transformation_cache, state_manager): + """Creates a lookup table for the vocabulary list.""" + input_tensor = _to_sparse_input_and_drop_ignore_values( + transformation_cache.get(self.key, state_manager)) + return self._transform_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key)) + return self._transform_input_tensor(input_tensor) + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return len(self.vocabulary_list) + self.num_oov_buckets + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" return CategoricalColumn.IdWeightPair( transformation_cache.get(self, state_manager), None) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + del weight_collections + del trainable + return CategoricalColumn.IdWeightPair(inputs.get(self), None) + class IdentityCategoricalColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple('IdentityCategoricalColumn', ('key', 'number_buckets', 'default_value'))): """See `categorical_column_with_identity`.""" + @property + def _is_v2_column(self): + return True + @property def name(self): """See `FeatureColumn` base class.""" @@ -3043,11 +3366,14 @@ class IdentityCategoricalColumn( """See `FeatureColumn` base class.""" return {self.key: parsing_ops.VarLenFeature(dtypes.int64)} - def transform_feature(self, transformation_cache, state_manager): - """Returns a SparseTensor with identity values.""" - input_tensor = _to_sparse_input_and_drop_ignore_values( - transformation_cache.get(self.key, state_manager)) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + def _transform_input_tensor(self, input_tensor): + """Returns a SparseTensor with identity values.""" if not input_tensor.dtype.is_integer: raise ValueError( 'Invalid input, not integer. key: {} dtype: {}'.format( @@ -3082,24 +3408,56 @@ class IdentityCategoricalColumn( values=values, dense_shape=input_tensor.dense_shape) + def transform_feature(self, transformation_cache, state_manager): + """Returns a SparseTensor with identity values.""" + input_tensor = _to_sparse_input_and_drop_ignore_values( + transformation_cache.get(self.key, state_manager)) + return self._transform_input_tensor(input_tensor) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key)) + return self._transform_input_tensor(input_tensor) + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return self.number_buckets + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" return CategoricalColumn.IdWeightPair( transformation_cache.get(self, state_manager), None) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + del weight_collections + del trainable + return CategoricalColumn.IdWeightPair(inputs.get(self), None) + class WeightedCategoricalColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple( 'WeightedCategoricalColumn', ('categorical_column', 'weight_feature_key', 'dtype'))): """See `weighted_categorical_column`.""" + @property + def _is_v2_column(self): + return (isinstance(self.categorical_column, FeatureColumn) and + self.categorical_column._is_v2_column) # pylint: disable=protected-access + @property def name(self): """See `FeatureColumn` base class.""" @@ -3116,15 +3474,29 @@ class WeightedCategoricalColumn( config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype) return config + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + config = self.categorical_column._parse_example_spec # pylint: disable=protected-access + if self.weight_feature_key in config: + raise ValueError('Parse config {} already exists for {}.'.format( + config[self.weight_feature_key], self.weight_feature_key)) + config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype) + return config + @property def num_buckets(self): """See `DenseColumn` base class.""" return self.categorical_column.num_buckets - def transform_feature(self, transformation_cache, state_manager): - """Applies weights to tensor generated from `categorical_column`'.""" - weight_tensor = transformation_cache.get(self.weight_feature_key, - state_manager) + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.categorical_column._num_buckets # pylint: disable=protected-access + + def _transform_weight_tensor(self, weight_tensor): if weight_tensor is None: raise ValueError('Missing weights {}.'.format(self.weight_feature_key)) weight_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( @@ -3138,27 +3510,63 @@ class WeightedCategoricalColumn( weight_tensor, ignore_value=0.0) if not weight_tensor.dtype.is_floating: weight_tensor = math_ops.to_float(weight_tensor) + return weight_tensor + + def transform_feature(self, transformation_cache, state_manager): + """Applies weights to tensor generated from `categorical_column`'.""" + weight_tensor = transformation_cache.get(self.weight_feature_key, + state_manager) + weight_tensor = self._transform_weight_tensor(weight_tensor) return (transformation_cache.get(self.categorical_column, state_manager), weight_tensor) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + """Applies weights to tensor generated from `categorical_column`'.""" + weight_tensor = inputs.get(self.weight_feature_key) + weight_tensor = self._transform_weight_tensor(weight_tensor) + return (inputs.get(self.categorical_column), weight_tensor) + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" tensors = transformation_cache.get(self, state_manager) return CategoricalColumn.IdWeightPair(tensors[0], tensors[1]) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + del weight_collections + del trainable + tensors = inputs.get(self) + return CategoricalColumn.IdWeightPair(tensors[0], tensors[1]) + class CrossedColumn( CategoricalColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access collections.namedtuple('CrossedColumn', ('keys', 'hash_bucket_size', 'hash_key'))): """See `crossed_column`.""" + @property + def _is_v2_column(self): + for key in _collect_leaf_level_keys(self): + if isinstance(key, six.string_types): + continue + if not isinstance(key, FeatureColumn): + return False + if not key._is_v2_column: # pylint: disable=protected-access + return False + return True + @property def name(self): """See `FeatureColumn` base class.""" feature_names = [] for key in _collect_leaf_level_keys(self): - if isinstance(key, FeatureColumn): + if isinstance(key, (FeatureColumn, fc_old._FeatureColumn)): # pylint: disable=protected-access feature_names.append(key.name) else: # key must be a string feature_names.append(key) @@ -3171,17 +3579,25 @@ class CrossedColumn( for key in self.keys: if isinstance(key, FeatureColumn): config.update(key.parse_example_spec) + elif isinstance(key, fc_old._FeatureColumn): # pylint: disable=protected-access + config.update(key._parse_example_spec) # pylint: disable=protected-access else: # key must be a string config.update({key: parsing_ops.VarLenFeature(dtypes.string)}) return config + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + def transform_feature(self, transformation_cache, state_manager): """Generates a hashed sparse cross from the input tensors.""" feature_tensors = [] for key in _collect_leaf_level_keys(self): if isinstance(key, six.string_types): feature_tensors.append(transformation_cache.get(key, state_manager)) - elif isinstance(key, CategoricalColumn): + elif isinstance(key, (fc_old._CategoricalColumn, CategoricalColumn)): # pylint: disable=protected-access ids_and_weights = key.get_sparse_tensors(transformation_cache, state_manager) if ids_and_weights.weight_tensor is not None: @@ -3197,16 +3613,54 @@ class CrossedColumn( num_buckets=self.hash_bucket_size, hash_key=self.hash_key) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + """Generates a hashed sparse cross from the input tensors.""" + feature_tensors = [] + for key in _collect_leaf_level_keys(self): + if isinstance(key, six.string_types): + feature_tensors.append(inputs.get(key)) + elif isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn)): # pylint: disable=protected-access + ids_and_weights = key._get_sparse_tensors(inputs) # pylint: disable=protected-access + if ids_and_weights.weight_tensor is not None: + raise ValueError( + 'crossed_column does not support weight_tensor, but the given ' + 'column populates weight_tensor. ' + 'Given column: {}'.format(key.name)) + feature_tensors.append(ids_and_weights.id_tensor) + else: + raise ValueError('Unsupported column type. Given: {}'.format(key)) + return sparse_ops.sparse_cross_hashed( + inputs=feature_tensors, + num_buckets=self.hash_bucket_size, + hash_key=self.hash_key) + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return self.hash_bucket_size + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.num_buckets + def get_sparse_tensors(self, transformation_cache, state_manager): """See `CategoricalColumn` base class.""" return CategoricalColumn.IdWeightPair( transformation_cache.get(self, state_manager), None) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + """See `CategoricalColumn` base class.""" + del weight_collections + del trainable + return CategoricalColumn.IdWeightPair(inputs.get(self), None) + def _collect_leaf_level_keys(cross): """Collects base keys by expanding all nested crosses. @@ -3382,9 +3836,12 @@ def _prune_invalid_weights(sparse_ids, sparse_weights): return sparse_ids, sparse_weights -class IndicatorColumn(DenseColumn, SequenceDenseColumn, - collections.namedtuple('IndicatorColumn', - ('categorical_column'))): +class IndicatorColumn( + DenseColumn, + SequenceDenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + fc_old._SequenceDenseColumn, # pylint: disable=protected-access + collections.namedtuple('IndicatorColumn', ('categorical_column'))): """Represents a one-hot column for use in deep networks. Args: @@ -3392,28 +3849,17 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn, `categorical_column_with_*` function. """ + @property + def _is_v2_column(self): + return (isinstance(self.categorical_column, FeatureColumn) and + self.categorical_column._is_v2_column) # pylint: disable=protected-access + @property def name(self): """See `FeatureColumn` base class.""" return '{}_indicator'.format(self.categorical_column.name) - def transform_feature(self, transformation_cache, state_manager): - """Returns dense `Tensor` representing feature. - - Args: - transformation_cache: A `FeatureTransformationCache` object to access - features. - state_manager: A `StateManager` to create / access resources such as - lookup tables. - - Returns: - Transformed feature `Tensor`. - - Raises: - ValueError: if input rank is not known at graph building time. - """ - id_weight_pair = self.categorical_column.get_sparse_tensors( - transformation_cache, state_manager) + def _transform_id_weight_pair(self, id_weight_pair): id_tensor = id_weight_pair.id_tensor weight_tensor = id_weight_pair.weight_tensor @@ -3422,7 +3868,7 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn, weighted_column = sparse_ops.sparse_merge( sp_ids=id_tensor, sp_values=weight_tensor, - vocab_size=int(self.variable_shape[-1])) + vocab_size=int(self._variable_shape[-1])) # Remove (?, -1) index weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0], weighted_column.dense_shape) @@ -3435,22 +3881,62 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn, # input_layer are float32. one_hot_id_tensor = array_ops.one_hot( dense_id_tensor, - depth=self.variable_shape[-1], + depth=self._variable_shape[-1], on_value=1.0, off_value=0.0) # Reduce to get a multi-hot per example. return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2]) + def transform_feature(self, transformation_cache, state_manager): + """Returns dense `Tensor` representing feature. + + Args: + transformation_cache: A `FeatureTransformationCache` object to access + features. + state_manager: A `StateManager` to create / access resources such as + lookup tables. + + Returns: + Transformed feature `Tensor`. + + Raises: + ValueError: if input rank is not known at graph building time. + """ + id_weight_pair = self.categorical_column.get_sparse_tensors( + transformation_cache, state_manager) + return self._transform_id_weight_pair(id_weight_pair) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + id_weight_pair = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access + return self._transform_id_weight_pair(id_weight_pair) + @property def parse_example_spec(self): """See `FeatureColumn` base class.""" return self.categorical_column.parse_example_spec + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec # pylint: disable=protected-access + @property def variable_shape(self): """Returns a `TensorShape` representing the shape of the dense `Tensor`.""" - return tensor_shape.TensorShape([1, self.categorical_column.num_buckets]) + if isinstance(self.categorical_column, FeatureColumn): + return tensor_shape.TensorShape([1, self.categorical_column.num_buckets]) + else: + return tensor_shape.TensorShape([1, self.categorical_column._num_buckets]) # pylint: disable=protected-access + + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return tensor_shape.TensorShape([1, self.categorical_column._num_buckets]) # pylint: disable=protected-access def get_dense_tensor(self, transformation_cache, state_manager): """Returns dense `Tensor` representing feature. @@ -3481,6 +3967,27 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn, # representation created by transform_feature. return transformation_cache.get(self, state_manager) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + del weight_collections + del trainable + if isinstance( + self.categorical_column, + (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)): # pylint: disable=protected-access + raise ValueError( + 'In indicator_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) + # Feature has been already transformed. Return the intermediate + # representation created by transform_feature. + return inputs.get(self) + def get_sequence_dense_tensor(self, transformation_cache, state_manager): """See `SequenceDenseColumn` base class.""" if not isinstance(self.categorical_column, SequenceCategoricalColumn): @@ -3496,7 +4003,36 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn, dense_tensor = transformation_cache.get(self, state_manager) sparse_tensors = self.categorical_column.get_sparse_tensors( transformation_cache, state_manager) - sequence_length = _sequence_length_from_sparse_tensor( + sequence_length = fc_old._sequence_length_from_sparse_tensor( # pylint: disable=protected-access + sparse_tensors.id_tensor) + return SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sequence_dense_tensor(self, + inputs, + weight_collections=None, + trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + if not isinstance( + self.categorical_column, + (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)): # pylint: disable=protected-access + raise ValueError( + 'In indicator_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) + # Feature has been already transformed. Return the intermediate + # representation created by _transform_feature. + dense_tensor = inputs.get(self) + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access + sequence_length = fc_old._sequence_length_from_sparse_tensor( # pylint: disable=protected-access sparse_tensors.id_tensor) return SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) @@ -3518,27 +4054,18 @@ def _verify_static_batch_size_equality(tensors, columns): expected_batch_size, tensors[i].shape[0])) -def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): - """Returns a [batch_size] Tensor with per-example sequence length.""" - with ops.name_scope(None, 'sequence_length') as name_scope: - row_ids = sp_tensor.indices[:, 0] - column_ids = sp_tensor.indices[:, 1] - column_ids += array_ops.ones_like(column_ids) - seq_length = math_ops.to_int64( - math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) - # If the last n rows do not have ids, seq_length will have shape - # [batch_size - n]. Pad the remaining values with zeros. - n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] - padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) - return array_ops.concat([seq_length, padding], axis=0, name=name_scope) - - -class SequenceCategoricalColumn(FeatureColumn, - collections.namedtuple( - 'SequenceCategoricalColumn', - ('categorical_column'))): +class SequenceCategoricalColumn( + FeatureColumn, + fc_old._CategoricalColumn, # pylint: disable=protected-access + collections.namedtuple('SequenceCategoricalColumn', + ('categorical_column'))): """Represents sequences of categorical data.""" + @property + def _is_v2_column(self): + return (isinstance(self.categorical_column, FeatureColumn) and + self.categorical_column._is_v2_column) # pylint: disable=protected-access + @property def name(self): """See `FeatureColumn` base class.""" @@ -3549,16 +4076,46 @@ class SequenceCategoricalColumn(FeatureColumn, """See `FeatureColumn` base class.""" return self.categorical_column.parse_example_spec + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec # pylint: disable=protected-access + def transform_feature(self, transformation_cache, state_manager): """See `FeatureColumn` base class.""" return self.categorical_column.transform_feature(transformation_cache, state_manager) + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) # pylint: disable=protected-access + @property def num_buckets(self): """Returns number of buckets in this sparse feature.""" return self.categorical_column.num_buckets + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _num_buckets(self): + return self.categorical_column._num_buckets # pylint: disable=protected-access + + def _get_sparse_tensors_helper(self, sparse_tensors): + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands third dimension, if necessary so that embeddings are not + # combined during embedding lookup. If the tensor is already 3D, leave + # as-is. + shape = array_ops.shape(id_tensor) + target_shape = [shape[0], shape[1], -1] + id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape) + if weight_tensor is not None: + weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape) + return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + def get_sequence_sparse_tensors(self, transformation_cache, state_manager): """Returns an IdWeightPair. @@ -3580,27 +4137,11 @@ class SequenceCategoricalColumn(FeatureColumn, """ sparse_tensors = self.categorical_column.get_sparse_tensors( transformation_cache, state_manager) - id_tensor = sparse_tensors.id_tensor - weight_tensor = sparse_tensors.weight_tensor - # Expands final dimension, so that embeddings are not combined during - # embedding lookup. - check_id_rank = check_ops.assert_equal( - array_ops.rank(id_tensor), 2, - data=[ - 'Column {} expected ID tensor of rank 2. '.format(self.name), - 'id_tensor shape: ', array_ops.shape(id_tensor)]) - with ops.control_dependencies([check_id_rank]): - id_tensor = sparse_ops.sparse_reshape( - id_tensor, - shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) - if weight_tensor is not None: - check_weight_rank = check_ops.assert_equal( - array_ops.rank(weight_tensor), 2, - data=[ - 'Column {} expected weight tensor of rank 2.'.format(self.name), - 'weight_tensor shape:', array_ops.shape(weight_tensor)]) - with ops.control_dependencies([check_weight_rank]): - weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, - shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) - return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + return self._get_sparse_tensors_helper(sparse_tensors) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access + return self._get_sparse_tensors_helper(sparse_tensors) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index d3787146ed..31bc0485ef 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -31,12 +31,8 @@ from tensorflow.python.client import session from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column as fc_old from tensorflow.python.feature_column import feature_column_v2 as fc -from tensorflow.python.feature_column.feature_column_v2 import _transform_features -from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn -from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer -from tensorflow.python.feature_column.feature_column_v2 import FeatureTransformationCache -from tensorflow.python.feature_column.feature_column_v2 import StateManager from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -46,6 +42,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test @@ -60,15 +57,29 @@ def _initialized_session(config=None): return sess +def get_linear_model_bias(name='linear_model'): + with variable_scope.variable_scope(name, reuse=True): + return variable_scope.get_variable('bias_weights') + + +def get_linear_model_column_var(column, name='linear_model'): + return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, + name + '/' + column.name)[0] + + class LazyColumnTest(test.TestCase): def test_transformations_called_once(self): - class TransformCounter(FeatureColumn): + class TransformCounter(fc.FeatureColumn): def __init__(self): self.num_transform = 0 + @property + def _is_v2_column(self): + return True + @property def name(self): return 'TransformCounter' @@ -81,7 +92,7 @@ class LazyColumnTest(test.TestCase): def parse_example_spec(self): pass - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={'a': [[2], [3.]]}) column = TransformCounter() self.assertEqual(0, column.num_transform) @@ -92,7 +103,11 @@ class LazyColumnTest(test.TestCase): def test_returns_transform_output(self): - class Transformer(FeatureColumn): + class Transformer(fc.FeatureColumn): + + @property + def _is_v2_column(self): + return True @property def name(self): @@ -105,7 +120,7 @@ class LazyColumnTest(test.TestCase): def parse_example_spec(self): pass - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={'a': [[2], [3.]]}) column = Transformer() self.assertEqual('Output', transformation_cache.get(column, None)) @@ -113,7 +128,11 @@ class LazyColumnTest(test.TestCase): def test_does_not_pollute_given_features_dict(self): - class Transformer(FeatureColumn): + class Transformer(fc.FeatureColumn): + + @property + def _is_v2_column(self): + return True @property def name(self): @@ -127,12 +146,12 @@ class LazyColumnTest(test.TestCase): pass features = {'a': [[2], [3.]]} - transformation_cache = FeatureTransformationCache(features=features) + transformation_cache = fc.FeatureTransformationCache(features=features) transformation_cache.get(Transformer(), None) self.assertEqual(['a'], list(features.keys())) def test_error_if_feature_is_not_found(self): - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={'a': [[2], [3.]]}) with self.assertRaisesRegexp(ValueError, 'bbb is not in features dictionary'): @@ -143,7 +162,11 @@ class LazyColumnTest(test.TestCase): def test_not_supported_feature_column(self): - class NotAProperColumn(FeatureColumn): + class NotAProperColumn(fc.FeatureColumn): + + @property + def _is_v2_column(self): + return True @property def name(self): @@ -157,7 +180,7 @@ class LazyColumnTest(test.TestCase): def parse_example_spec(self): pass - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={'a': [[2], [3.]]}) with self.assertRaisesRegexp(ValueError, 'NotAProperColumn is not supported'): @@ -168,7 +191,7 @@ class LazyColumnTest(test.TestCase): class NotAFeatureColumn(object): pass - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={'a': [[2], [3.]]}) with self.assertRaisesRegexp( TypeError, '"key" must be either a "str" or "FeatureColumn".'): @@ -176,7 +199,7 @@ class LazyColumnTest(test.TestCase): def test_expand_dim_rank_1_sparse_tensor_empty_batch(self): # empty 1-D sparse tensor: - transformation_cache = FeatureTransformationCache( + transformation_cache = fc.FeatureTransformationCache( features={ 'a': sparse_tensor.SparseTensor( @@ -201,6 +224,7 @@ class NumericColumnTest(test.TestCase): self.assertIsNone(a.default_value) self.assertEqual(dtypes.float32, a.dtype) self.assertIsNone(a.normalizer_fn) + self.assertTrue(a._is_v2_column) def test_key_should_be_string(self): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): @@ -317,7 +341,9 @@ class NumericColumnTest(test.TestCase): return input_tensor + 2. price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two) - output = _transform_features({'price': [[1., 2.], [5., 6.]]}, [price], None) + output = fc._transform_features({ + 'price': [[1., 2.], [5., 6.]] + }, [price], None) with self.cached_session(): self.assertAllEqual([[3., 4.], [7., 8.]], output[price].eval()) @@ -327,7 +353,7 @@ class NumericColumnTest(test.TestCase): return input_tensor + 2. price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': [[1., 2.], [5., 6.]] }) self.assertEqual( @@ -336,7 +362,7 @@ class NumericColumnTest(test.TestCase): def test_sparse_tensor_not_supported(self): price = fc.numeric_column('price') - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': sparse_tensor.SparseTensor( indices=[[0, 0]], values=[0.3], dense_shape=[1, 1]) @@ -370,6 +396,20 @@ class NumericColumnTest(test.TestCase): sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], predictions.eval()) + def test_old_linear_model(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = fc_old.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.]], price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price_var.assign([[10.]])) + self.assertAllClose([[10.], [50.]], predictions.eval()) + class BucketizedColumnTest(test.TestCase): @@ -404,6 +444,13 @@ class BucketizedColumnTest(test.TestCase): def test_name(self): a = fc.numeric_column('aaa', dtype=dtypes.int32) b = fc.bucketized_column(a, boundaries=[0, 1]) + self.assertTrue(b._is_v2_column) + self.assertEqual('aaa_bucketized', b.name) + + def test_is_v2_column_old_numeric(self): + a = fc_old.numeric_column('aaa', dtype=dtypes.int32) + b = fc.bucketized_column(a, boundaries=[0, 1]) + self.assertFalse(b._is_v2_column) self.assertEqual('aaa_bucketized', b.name) def test_parse_spec(self): @@ -445,7 +492,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - transformed_tensor = _transform_features({ + transformed_tensor = fc._transform_features({ 'price': [[-1., 1.], [5., 6.]] }, [bucketized_price], None) with _initialized_session(): @@ -457,7 +504,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[1]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': [[-1.], [1.], [5.], [6.]] }) with _initialized_session(): @@ -476,7 +523,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': [[-1., 1.], [5., 6.]] }) with _initialized_session(): @@ -493,7 +540,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[1]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': [[-1.], [1.], [5.], [6.]] }) with _initialized_session() as sess: @@ -511,7 +558,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': [[-1., 1.], [5., 6.]] }) with _initialized_session() as sess: @@ -529,7 +576,7 @@ class BucketizedColumnTest(test.TestCase): def test_sparse_tensor_input_not_supported(self): price = fc.numeric_column('price') bucketized_price = fc.bucketized_column(price, boundaries=[0, 1]) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'price': sparse_tensor.SparseTensor( indices=[[0, 0]], values=[0.3], dense_shape=[1, 1]) @@ -599,6 +646,85 @@ class BucketizedColumnTest(test.TestCase): sess.run(bias.assign([1.])) self.assertAllClose([[81.], [141.]], predictions.eval()) + def test_old_linear_model_one_input_value(self): + """Tests linear_model() for input with shape=[1].""" + price = fc.numeric_column('price', shape=[1]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) + with ops.Graph().as_default(): + features = {'price': [[-1.], [1.], [5.], [6.]]} + predictions = fc_old.linear_model(features, [bucketized_price]) + bias = get_linear_model_bias() + bucketized_price_var = get_linear_model_column_var(bucketized_price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + # One weight variable per bucket, all initialized to zero. + self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], + bucketized_price_var.eval()) + self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval()) + sess.run( + bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) + # price -1. is in the 0th bucket, whose weight is 10. + # price 1. is in the 1st bucket, whose weight is 20. + # price 5. is in the 3rd bucket, whose weight is 40. + # price 6. is in the 4th bucket, whose weight is 50. + self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval()) + sess.run(bias.assign([1.])) + self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval()) + + def test_old_linear_model_two_input_values(self): + """Tests linear_model() for input with shape=[2].""" + price = fc.numeric_column('price', shape=[2]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) + with ops.Graph().as_default(): + features = {'price': [[-1., 1.], [5., 6.]]} + predictions = fc_old.linear_model(features, [bucketized_price]) + bias = get_linear_model_bias() + bucketized_price_var = get_linear_model_column_var(bucketized_price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + # One weight per bucket per input column, all initialized to zero. + self.assertAllClose( + [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]], + bucketized_price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run( + bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.], + [60.], [70.], [80.], [90.], [100.]])) + # 1st example: + # price -1. is in the 0th bucket, whose weight is 10. + # price 1. is in the 6th bucket, whose weight is 70. + # 2nd example: + # price 5. is in the 3rd bucket, whose weight is 40. + # price 6. is in the 9th bucket, whose weight is 100. + self.assertAllClose([[80.], [140.]], predictions.eval()) + sess.run(bias.assign([1.])) + self.assertAllClose([[81.], [141.]], predictions.eval()) + + def test_old_linear_model_one_input_value_old_numeric(self): + """Tests linear_model() for input with shape=[1].""" + price = fc_old.numeric_column('price', shape=[1]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) + with ops.Graph().as_default(): + features = {'price': [[-1.], [1.], [5.], [6.]]} + predictions = fc_old.linear_model(features, [bucketized_price]) + bias = get_linear_model_bias() + bucketized_price_var = get_linear_model_column_var(bucketized_price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + # One weight variable per bucket, all initialized to zero. + self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], + bucketized_price_var.eval()) + self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval()) + sess.run( + bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) + # price -1. is in the 0th bucket, whose weight is 10. + # price 1. is in the 1st bucket, whose weight is 20. + # price 5. is in the 3rd bucket, whose weight is 40. + # price 6. is in the 4th bucket, whose weight is 50. + self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval()) + sess.run(bias.assign([1.])) + self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval()) + class HashedCategoricalColumnTest(test.TestCase): @@ -608,6 +734,7 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertEqual('aaa', a.key) self.assertEqual(10, a.hash_bucket_size) self.assertEqual(dtypes.string, a.dtype) + self.assertTrue(a._is_v2_column) def test_key_should_be_string(self): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): @@ -675,7 +802,9 @@ class HashedCategoricalColumnTest(test.TestCase): values=['omar', 'stringer', 'marlo'], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) - outputs = _transform_features({'wire': wire_tensor}, [hashed_sparse], None) + outputs = fc._transform_features({ + 'wire': wire_tensor + }, [hashed_sparse], None) output = outputs[hashed_sparse] # Check exact hashed output. If hashing changes this test will break. expected_values = [6, 4, 1] @@ -705,7 +834,7 @@ class HashedCategoricalColumnTest(test.TestCase): values=[101.], indices=[[0, 0]], dense_shape=[1, 1]) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'a_int': int_tensor, 'a_string': string_tensor, 'a_float': float_tensor @@ -720,7 +849,7 @@ class HashedCategoricalColumnTest(test.TestCase): 'wire', 10, dtype=dtypes.int64) wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - transformation_cache = FeatureTransformationCache({'wire': wire_tensor}) + transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor}) with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): transformation_cache.get(hashed_sparse, None) @@ -731,7 +860,7 @@ class HashedCategoricalColumnTest(test.TestCase): values=[101, 201, 301], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) - transformation_cache = FeatureTransformationCache({'wire': wire_tensor}) + transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor}) output = transformation_cache.get(hashed_sparse, None) # Check exact hashed output. If hashing changes this test will break. expected_values = [3, 7, 5] @@ -745,7 +874,7 @@ class HashedCategoricalColumnTest(test.TestCase): values=constant_op.constant([101, 201, 301], dtype=dtypes.int32), indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) - transformation_cache = FeatureTransformationCache({'wire': wire_tensor}) + transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor}) output = transformation_cache.get(hashed_sparse, None) # Check exact hashed output. If hashing changes this test will break. expected_values = [3, 7, 5] @@ -754,7 +883,7 @@ class HashedCategoricalColumnTest(test.TestCase): def test_get_sparse_tensors(self): hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'wire': sparse_tensor.SparseTensor( values=['omar', 'stringer', 'marlo'], @@ -769,7 +898,7 @@ class HashedCategoricalColumnTest(test.TestCase): def test_get_sparse_tensors_dense_input(self): hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'wire': (('omar', ''), ('stringer', 'marlo')) }) id_weight_pair = hashed_sparse.get_sparse_tensors(transformation_cache, @@ -800,6 +929,28 @@ class HashedCategoricalColumnTest(test.TestCase): # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), predictions.eval()) + def test_old_linear_model(self): + wire_column = fc.categorical_column_with_hash_bucket('wire', 4) + self.assertEqual(4, wire_column.num_buckets) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 3: wire_var[3] = 4 + # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 + self.assertAllClose(((4.,), (6.,)), predictions.eval()) + class CrossedColumnTest(test.TestCase): @@ -841,8 +992,20 @@ class CrossedColumnTest(test.TestCase): a = fc.numeric_column('a', dtype=dtypes.int32) b = fc.bucketized_column(a, boundaries=[0, 1]) crossed1 = fc.crossed_column(['d1', 'd2'], 10) + self.assertTrue(crossed1._is_v2_column) + + crossed2 = fc.crossed_column([b, 'c', crossed1], 10) + self.assertTrue(crossed2._is_v2_column) + self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name) + + def test_is_v2_column(self): + a = fc_old.numeric_column('a', dtype=dtypes.int32) + b = fc.bucketized_column(a, boundaries=[0, 1]) + crossed1 = fc.crossed_column(['d1', 'd2'], 10) + self.assertTrue(crossed1._is_v2_column) crossed2 = fc.crossed_column([b, 'c', crossed1], 10) + self.assertFalse(crossed2._is_v2_column) self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name) def test_name_ordered_alphabetically(self): @@ -927,7 +1090,7 @@ class CrossedColumnTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), } - outputs = _transform_features(features, [price_cross_wire], None) + outputs = fc._transform_features(features, [price_cross_wire], None) output = outputs[price_cross_wire] with self.cached_session() as sess: output_val = sess.run(output) @@ -943,7 +1106,7 @@ class CrossedColumnTest(test.TestCase): crossed1 = fc.crossed_column(['d1', 'd2'], 10) crossed2 = fc.crossed_column([b, 'c', crossed1], 15, hash_key=5) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'a': constant_op.constant(((-1., .5), (.5, 1.))), 'c': @@ -983,7 +1146,7 @@ class CrossedColumnTest(test.TestCase): b = fc.bucketized_column(a, boundaries=(0, 1)) crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) with ops.Graph().as_default(): - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'a': constant_op.constant(((-1., .5), (.5, 1.))), 'c': @@ -1040,6 +1203,10 @@ class CrossedColumnTest(test.TestCase): class _TestColumnWithWeights(fc.CategoricalColumn): """Produces sparse IDs and sparse weights.""" + @property + def _is_v2_column(self): + return True + @property def name(self): return 'test_column' @@ -1092,6 +1259,146 @@ class CrossedColumnTest(test.TestCase): dense_shape=(2, 2)), }) + def test_old_linear_model(self): + """Tests linear_model. + + Uses data from test_get_sparse_tesnsors_simple. + """ + a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) + b = fc.bucketized_column(a, boundaries=(0, 1)) + crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'a': + constant_op.constant(((-1., .5), (.5, 1.))), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + bias = get_linear_model_bias() + crossed_var = get_linear_model_column_var(crossed) + with _initialized_session() as sess: + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), + crossed_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + # Expected ids after cross = (1, 0, 1, 3, 4, 2) + self.assertAllClose(((3.,), (14.,)), predictions.eval()) + sess.run(bias.assign((.1,))) + self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) + + def test_old_linear_model_with_weights(self): + + class _TestColumnWithWeights(fc.CategoricalColumn, + fc_old._CategoricalColumn): + """Produces sparse IDs and sparse weights.""" + + @property + def _is_v2_column(self): + return True + + @property + def name(self): + return 'test_column' + + @property + def parse_example_spec(self): + return { + self.name: + parsing_ops.VarLenFeature(dtypes.int32), + '{}_weights'.format(self.name): + parsing_ops.VarLenFeature(dtypes.float32), + } + + @property + def _parse_example_spec(self): + return self.parse_example_spec + + @property + def num_buckets(self): + return 5 + + @property + def _num_buckets(self): + return self.num_buckets + + def transform_feature(self, transformation_cache, state_manager): + raise ValueError('Should not be called.') + + def _transform_feature(self, inputs): + return (inputs.get(self.name), + inputs.get('{}_weights'.format(self.name))) + + def get_sparse_tensors(self, transformation_cache, state_manager): + raise ValueError('Should not be called.') + + def _get_sparse_tensors(self, + inputs, + weight_collections=None, + trainable=None): + """Populates both id_tensor and weight_tensor.""" + ids_and_weights = inputs.get(self) + return fc.CategoricalColumn.IdWeightPair( + id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) + + t = _TestColumnWithWeights() + crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + with self.assertRaisesRegexp( + ValueError, + 'crossed_column does not support weight_tensor.*{}'.format(t.name)): + fc_old.linear_model({ + t.name: + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[0, 1, 2], + dense_shape=(2, 2)), + '{}_weights'.format(t.name): + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[1., 10., 2.], + dense_shape=(2, 2)), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + + def test_old_linear_model_old_numeric(self): + """Tests linear_model. + + Uses data from test_get_sparse_tesnsors_simple. + """ + a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,)) + b = fc.bucketized_column(a, boundaries=(0, 1)) + crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'a': + constant_op.constant(((-1., .5), (.5, 1.))), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + bias = get_linear_model_bias() + crossed_var = get_linear_model_column_var(crossed) + with _initialized_session() as sess: + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), + crossed_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + # Expected ids after cross = (1, 0, 1, 3, 4, 2) + self.assertAllClose(((3.,), (14.,)), predictions.eval()) + sess.run(bias.assign((.1,))) + self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) + class LinearModelTest(test.TestCase): @@ -1108,6 +1415,10 @@ class LinearModelTest(test.TestCase): class NotSupportedColumn(fc.FeatureColumn): + @property + def _is_v2_column(self): + return True + @property def name(self): return 'NotSupportedColumn' @@ -1189,6 +1500,10 @@ class LinearModelTest(test.TestCase): class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn): + @property + def _is_v2_column(self): + return True + @property def name(self): return 'dense_and_sparse_column' @@ -1735,60 +2050,1519 @@ class LinearModelTest(test.TestCase): self.assertAllClose([[25.], [105.]], predictions2.eval()) -class FeatureLayerTest(test.TestCase): - - @test_util.run_in_graph_and_eager_modes() - def test_retrieving_input(self): - features = {'a': [0.]} - feature_layer = FeatureLayer(fc.numeric_column('a')) - inputs = self.evaluate(feature_layer(features)) - self.assertAllClose([[0.]], inputs) +class OldLinearModelTest(test.TestCase): - def test_reuses_variables(self): - with context.eager_mode(): - sparse_input = sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (2, 0)), - values=(0, 1, 2), - dense_shape=(3, 3)) + def test_raises_if_empty_feature_columns(self): + with self.assertRaisesRegexp(ValueError, + 'feature_columns must not be empty'): + fc_old.linear_model(features={}, feature_columns=[]) - # Create feature columns (categorical and embedding). - categorical_column = fc.categorical_column_with_identity( - key='a', num_buckets=3) - embedding_dimension = 2 - def _embedding_column_initializer(shape, dtype, partition_info): - del shape # unused - del dtype # unused - del partition_info # unused - embedding_values = ( - (1, 0), # id 0 - (0, 1), # id 1 - (1, 1)) # id 2 - return embedding_values + def test_should_be_feature_column(self): + with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'): + fc_old.linear_model(features={'a': [[0]]}, feature_columns='NotSupported') - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_embedding_column_initializer) + def test_should_be_dense_or_categorical_column(self): - feature_layer = FeatureLayer([embedding_column]) - features = {'a': sparse_input} + class NotSupportedColumn(fc.FeatureColumn, fc_old._FeatureColumn): - inputs = feature_layer(features) - variables = feature_layer.variables + @property + def _is_v2_column(self): + return True - # Sanity check: test that the inputs are correct. - self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) + @property + def name(self): + return 'NotSupportedColumn' + + def transform_feature(self, transformation_cache, state_manager): + pass + + def _transform_feature(self, inputs): + pass + + @property + def parse_example_spec(self): + pass + + @property + def _parse_example_spec(self): + pass + + with self.assertRaisesRegexp( + ValueError, 'must be either a _DenseColumn or _CategoricalColumn'): + fc_old.linear_model( + features={'a': [[0]]}, feature_columns=[NotSupportedColumn()]) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegexp( + ValueError, 'Expected feature_columns to be iterable, found dict.'): + fc_old.linear_model( + features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')}) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegexp( + ValueError, 'Duplicate feature column name found for columns'): + fc_old.linear_model( + features={'a': [[0]]}, + feature_columns=[fc.numeric_column('a'), + fc.numeric_column('a')]) + + def test_dense_bias(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = fc_old.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + sess.run(price_var.assign([[10.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[15.], [55.]], predictions.eval()) + + def test_sparse_bias(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc_old.linear_model(features, [wire_cast]) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval()) + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [10015.]], predictions.eval()) + + def test_dense_and_sparse_bias(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + price = fc.numeric_column('price') + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} + predictions = fc_old.linear_model(features, [wire_cast, price]) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + sess.run(price_var.assign([[10.]])) + self.assertAllClose([[1015.], [10065.]], predictions.eval()) + + def test_dense_and_sparse_column(self): + """When the column is both dense and sparse, uses sparse tensors.""" + + class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn, + fc_old._DenseColumn, fc_old._CategoricalColumn): + + @property + def _is_v2_column(self): + return True + + @property + def name(self): + return 'dense_and_sparse_column' + + @property + def parse_example_spec(self): + return {self.name: parsing_ops.VarLenFeature(self.dtype)} + + @property + def _parse_example_spec(self): + return self.parse_example_spec + + def transform_feature(self, transformation_cache, state_manager): + raise ValueError('Should not use this method.') + + def _transform_feature(self, inputs): + return inputs.get(self.name) + + @property + def variable_shape(self): + return self.variable_shape + + @property + def _variable_shape(self): + return self.variable_shape + + def get_dense_tensor(self, transformation_cache, state_manager): + raise ValueError('Should not use this method.') + + def _get_dense_tensor(self, inputs): + raise ValueError('Should not use this method.') + + @property + def num_buckets(self): + return 4 + + @property + def _num_buckets(self): + return self.num_buckets + + def get_sparse_tensors(self, transformation_cache, state_manager): + raise ValueError('Should not use this method.') + + def _get_sparse_tensors(self, + inputs, + weight_collections=None, + trainable=None): + sp_tensor = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [1, 1]], + values=[2, 0, 3], + dense_shape=[2, 2]) + return fc.CategoricalColumn.IdWeightPair(sp_tensor, None) + + dense_and_sparse_column = _DenseAndSparseColumn() + with ops.Graph().as_default(): + sp_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {dense_and_sparse_column.name: sp_tensor} + predictions = fc_old.linear_model(features, [dense_and_sparse_column]) + bias = get_linear_model_bias() + dense_and_sparse_column_var = get_linear_model_column_var( + dense_and_sparse_column) + with _initialized_session() as sess: + sess.run( + dense_and_sparse_column_var.assign([[10.], [100.], [1000.], + [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [10015.]], predictions.eval()) + + def test_dense_multi_output(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = fc_old.linear_model(features, [price], units=3) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((1, 3)), price_var.eval()) + sess.run(price_var.assign([[10., 100., 1000.]])) + sess.run(bias.assign([5., 6., 7.])) + self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]], + predictions.eval()) + + def test_sparse_multi_output(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc_old.linear_model(features, [wire_cast], units=3) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval()) + sess.run( + wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], + [1000., 1100., 1200.], + [10000., 11000., 12000.]])) + sess.run(bias.assign([5., 6., 7.])) + self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], + predictions.eval()) + + def test_dense_multi_dimension(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + predictions = fc_old.linear_model(features, [price]) + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([[0.], [0.]], price_var.eval()) + sess.run(price_var.assign([[10.], [100.]])) + self.assertAllClose([[210.], [650.]], predictions.eval()) + + def test_sparse_multi_rank(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = array_ops.sparse_placeholder(dtypes.string) + wire_value = sparse_tensor.SparseTensorValue( + values=['omar', 'stringer', 'marlo', 'omar'], # hashed = [2, 0, 3, 2] + indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], + dense_shape=[2, 2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc_old.linear_model(features, [wire_cast]) + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval()) + self.assertAllClose( + np.zeros((2, 1)), + predictions.eval(feed_dict={wire_tensor: wire_value})) + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + self.assertAllClose( + [[1010.], [11000.]], + predictions.eval(feed_dict={wire_tensor: wire_value})) + + def test_sparse_combiner(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc_old.linear_model( + features, [wire_cast], sparse_combiner='mean') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [5010.]], predictions.eval()) + + def test_sparse_combiner_with_negative_weights(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights') + + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = { + 'wire_cast': wire_tensor, + 'weights': constant_op.constant([[1., 1., -1.0]]) + } + predictions = fc_old.linear_model( + features, [wire_cast_weights], sparse_combiner='sum') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [-9985.]], predictions.eval()) + + def test_dense_multi_dimension_multi_output(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + predictions = fc_old.linear_model(features, [price], units=3) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((2, 3)), price_var.eval()) + sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]])) + sess.run(bias.assign([2., 3., 4.])) + self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]], + predictions.eval()) + + def test_raises_if_shape_mismatch(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + with self.assertRaisesRegexp( + Exception, + r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): + fc_old.linear_model(features, [price]) + + def test_dense_reshaping(self): + price = fc.numeric_column('price', shape=[1, 2]) + with ops.Graph().as_default(): + features = {'price': [[[1., 2.]], [[5., 6.]]]} + predictions = fc_old.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.]], price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price_var.assign([[10.], [100.]])) + self.assertAllClose([[210.], [650.]], predictions.eval()) + + def test_dense_multi_column(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + predictions = fc_old.linear_model(features, [price1, price2]) + bias = get_linear_model_bias() + price1_var = get_linear_model_column_var(price1) + price2_var = get_linear_model_column_var(price2) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.]], price1_var.eval()) + self.assertAllClose([[0.]], price2_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price1_var.assign([[10.], [100.]])) + sess.run(price2_var.assign([[1000.]])) + sess.run(bias.assign([7.])) + self.assertAllClose([[3217.], [4657.]], predictions.eval()) + + def test_fills_cols_to_vars(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + cols_to_vars = {} + fc_old.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) + bias = get_linear_model_bias() + price1_var = get_linear_model_column_var(price1) + price2_var = get_linear_model_column_var(price2) + self.assertAllEqual(cols_to_vars['bias'], [bias]) + self.assertAllEqual(cols_to_vars[price1], [price1_var]) + self.assertAllEqual(cols_to_vars[price2], [price2_var]) + + def test_fills_cols_to_vars_partitioned_variables(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2', shape=3) + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [6., 7.]], + 'price2': [[3., 4., 5.], [8., 9., 10.]] + } + cols_to_vars = {} + with variable_scope.variable_scope( + 'linear', + partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)): + fc_old.linear_model( + features, [price1, price2], cols_to_vars=cols_to_vars) + with _initialized_session(): + self.assertEqual([0.], cols_to_vars['bias'][0].eval()) + # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables. + self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval()) + self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval()) + # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and + # a [1, 1] Variable. + self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval()) + self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval()) + + def test_fills_cols_to_output_tensors(self): + # Provide three _DenseColumn's to input_layer: a _NumericColumn, a + # _BucketizedColumn, and an _EmbeddingColumn. Only the _EmbeddingColumn + # creates a Variable. + apple_numeric_column = fc.numeric_column('apple_numeric_column') + banana_dense_feature = fc.numeric_column('banana_dense_feature') + banana_dense_feature_bucketized = fc.bucketized_column( + banana_dense_feature, boundaries=[0.]) + cherry_sparse_column = fc.categorical_column_with_hash_bucket( + 'cherry_sparse_feature', hash_bucket_size=5) + dragonfruit_embedding_column = fc.embedding_column( + cherry_sparse_column, dimension=10) + with ops.Graph().as_default(): + features = { + 'apple_numeric_column': [[3.], [4.]], + 'banana_dense_feature': [[-1.], [4.]], + 'cherry_sparse_feature': [['a'], ['x']], + } + cols_to_output_tensors = {} + all_cols = [ + apple_numeric_column, banana_dense_feature_bucketized, + dragonfruit_embedding_column + ] + input_layer = fc_old.input_layer( + features, all_cols, cols_to_output_tensors=cols_to_output_tensors) + + # We check the mapping by checking that we have the right keys, + # and that the values (output_tensors) were indeed the ones used to + # form the input layer. + self.assertItemsEqual(all_cols, cols_to_output_tensors.keys()) + input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]] + output_tensors = [tensor for tensor in cols_to_output_tensors.values()] + self.assertItemsEqual(input_layer_inputs, output_tensors) + + def test_dense_collection(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc_old.linear_model(features, [price], weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + self.assertIn(bias, my_vars) + self.assertIn(price_var, my_vars) + + def test_sparse_collection(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc_old.linear_model(features, [wire_cast], weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + self.assertIn(bias, my_vars) + self.assertIn(wire_cast_var, my_vars) + + def test_dense_trainable_default(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc_old.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertIn(bias, trainable_vars) + self.assertIn(price_var, trainable_vars) + + def test_sparse_trainable_default(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc_old.linear_model(features, [wire_cast]) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + self.assertIn(bias, trainable_vars) + self.assertIn(wire_cast_var, trainable_vars) + + def test_dense_trainable_false(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc_old.linear_model(features, [price], trainable=False) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertEqual([], trainable_vars) + + def test_sparse_trainable_false(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc_old.linear_model(features, [wire_cast], trainable=False) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertEqual([], trainable_vars) + + def test_column_order(self): + price_a = fc.numeric_column('price_a') + price_b = fc.numeric_column('price_b') + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + 'wire_cast': + sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + } + fc_old.linear_model( + features, [price_a, wire_cast, price_b], + weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + self.assertIn('price_a', my_vars[0].name) + self.assertIn('price_b', my_vars[1].name) + self.assertIn('wire_cast', my_vars[2].name) + + with ops.Graph().as_default() as g: + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + 'wire_cast': + sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + } + fc_old.linear_model( + features, [wire_cast, price_b, price_a], + weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + self.assertIn('price_a', my_vars[0].name) + self.assertIn('price_b', my_vars[1].name) + self.assertIn('wire_cast', my_vars[2].name) + + def test_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1.], [5.], [7.]], # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc_old.linear_model(features, [price1, price2]) + + def test_subset_of_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + price3 = fc.numeric_column('price3') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]], # batchsize = 2 + 'price3': [[3.], [4.], [5.]] # batchsize = 3 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc_old.linear_model(features, [price1, price2, price3]) + + def test_runtime_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + predictions = fc_old.linear_model(features, [price1, price2]) + with _initialized_session() as sess: + with self.assertRaisesRegexp(errors.OpError, + 'must have the same size and shape'): + sess.run( + predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) + + def test_runtime_batch_size_matches(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + } + predictions = fc_old.linear_model(features, [price1, price2]) + with _initialized_session() as sess: + sess.run( + predictions, + feed_dict={ + features['price1']: [[1.], [5.]], + features['price2']: [[1.], [5.]], + }) + + def test_with_1d_sparse_tensor(self): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': + constant_op.constant([ + -1., + 12., + ]), + 'body-style': + sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + + net = fc_old.linear_model(features, [price_buckets, body_style]) + with _initialized_session() as sess: + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net)) + + def test_with_1d_unknown_shape_sparse_tensor(self): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + + price_data = np.array([-1., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) + country_data = np.array(['US', 'CA']) + + net = fc_old.linear_model(features, [price_buckets, body_style, country]) + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + with _initialized_session() as sess: + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) + + def test_with_rank_0_feature(self): + price = fc.numeric_column('price') + features = { + 'price': constant_op.constant(0), + } + self.assertEqual(0, features['price'].shape.ndims) + + # Static rank 0 should fail + with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): + fc_old.linear_model(features, [price]) + + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = fc_old.linear_model(features, [price]) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) + + def test_multiple_linear_models(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features1 = {'price': [[1.], [5.]]} + features2 = {'price': [[2.], [10.]]} + predictions1 = fc_old.linear_model(features1, [price]) + predictions2 = fc_old.linear_model(features2, [price]) + bias1 = get_linear_model_bias(name='linear_model') + bias2 = get_linear_model_bias(name='linear_model_1') + price_var1 = get_linear_model_column_var(price, name='linear_model') + price_var2 = get_linear_model_column_var(price, name='linear_model_1') + with _initialized_session() as sess: + self.assertAllClose([0.], bias1.eval()) + sess.run(price_var1.assign([[10.]])) + sess.run(bias1.assign([5.])) + self.assertAllClose([[15.], [55.]], predictions1.eval()) + self.assertAllClose([0.], bias2.eval()) + sess.run(price_var2.assign([[10.]])) + sess.run(bias2.assign([5.])) + self.assertAllClose([[25.], [105.]], predictions2.eval()) + + def test_linear_model_v1_shared_embedding_all_other_v2(self): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v2 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v2 + categorical_column_a = fc_old.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc_old.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + + with ops.Graph().as_default(): + features = { + 'price': [[3.], [4.]], + 'sparse_feature': [['a'], ['x']], + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + fc_old.linear_model(features, all_cols) + bias = get_linear_model_bias() + with _initialized_session(): + self.assertAllClose([0.], bias.eval()) + + def test_linear_model_v1_shared_embedding_with_v2_cat_all_other_v2(self): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v2 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v2 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + + with ops.Graph().as_default(): + features = { + 'price': [[3.], [4.]], + 'sparse_feature': [['a'], ['x']], + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + fc_old.linear_model(features, all_cols) + bias = get_linear_model_bias() + with _initialized_session(): + self.assertAllClose([0.], bias.eval()) + + def test_linear_model_v1_v2_mix(self): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc_old.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v1 + some_embedding_column = fc_old.embedding_column( + some_sparse_column, dimension=10) # v1 + categorical_column_a = fc_old.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc_old.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + + with ops.Graph().as_default(): + features = { + 'price': [[3.], [4.]], + 'sparse_feature': [['a'], ['x']], + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + fc_old.linear_model(features, all_cols) + bias = get_linear_model_bias() + with _initialized_session(): + self.assertAllClose([0.], bias.eval()) + + def test_linear_model_v2_shared_embedding_all_other_v1(self): + price = fc_old.numeric_column('price') # v1 + some_sparse_column = fc_old.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v1 + some_embedding_column = fc_old.embedding_column( + some_sparse_column, dimension=10) # v1 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], dimension=2) # v2 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + + with ops.Graph().as_default(): + features = { + 'price': [[3.], [4.]], + 'sparse_feature': [['a'], ['x']], + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + with self.assertRaisesRegexp(ValueError, + 'SharedEmbeddingColumns are not supported'): + fc_old.linear_model(features, all_cols) + + +class FeatureLayerTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_retrieving_input(self): + features = {'a': [0.]} + feature_layer = fc.FeatureLayer(fc.numeric_column('a')) + inputs = self.evaluate(feature_layer(features)) + self.assertAllClose([[0.]], inputs) + + def test_reuses_variables(self): + with context.eager_mode(): + sparse_input = sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3)) + + # Create feature columns (categorical and embedding). + categorical_column = fc.categorical_column_with_identity( + key='a', num_buckets=3) + embedding_dimension = 2 + def _embedding_column_initializer(shape, dtype, partition_info): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ( + (1, 0), # id 0 + (0, 1), # id 1 + (1, 1)) # id 2 + return embedding_values + + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer) + + feature_layer = fc.FeatureLayer([embedding_column]) + features = {'a': sparse_input} + + inputs = feature_layer(features) + variables = feature_layer.variables + + # Sanity check: test that the inputs are correct. + self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) + + # Check that only one variable was created. + self.assertEqual(1, len(variables)) + + # Check that invoking feature_layer on the same features does not create + # additional variables + _ = feature_layer(features) + self.assertEqual(1, len(variables)) + self.assertEqual(variables[0], feature_layer.variables[0]) + + def test_feature_column_feature_layer_gradient(self): + with context.eager_mode(): + sparse_input = sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3)) + + # Create feature columns (categorical and embedding). + categorical_column = fc.categorical_column_with_identity( + key='a', num_buckets=3) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ( + (1, 0), # id 0 + (0, 1), # id 1 + (1, 1)) # id 2 + return embedding_values + + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer) + + feature_layer = fc.FeatureLayer([embedding_column]) + features = {'a': sparse_input} + + def scale_matrix(): + matrix = feature_layer(features) + return 2 * matrix + + # Sanity check: Verify that scale_matrix returns the correct output. + self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix()) + + # Check that the returned gradient is correct. + grad_function = backprop.implicit_grad(scale_matrix) + grads_and_vars = grad_function() + indexed_slice = grads_and_vars[0][0] + gradient = grads_and_vars[0][0].values + + self.assertAllEqual([0, 1, 2], indexed_slice.indices) + self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) + + def test_raises_if_empty_feature_columns(self): + with self.assertRaisesRegexp(ValueError, + 'feature_columns must not be empty'): + fc.FeatureLayer(feature_columns=[])(features={}) + + def test_should_be_dense_column(self): + with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'): + fc.FeatureLayer(feature_columns=[ + fc.categorical_column_with_hash_bucket('wire_cast', 4) + ])( + features={ + 'a': [[0]] + }) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegexp( + ValueError, 'Expected feature_columns to be iterable, found dict.'): + fc.FeatureLayer(feature_columns={'a': fc.numeric_column('a')})( + features={ + 'a': [[0]] + }) + + def test_bare_column(self): + with ops.Graph().as_default(): + features = features = {'a': [0.]} + net = fc.FeatureLayer(fc.numeric_column('a'))(features) + with _initialized_session(): + self.assertAllClose([[0.]], net.eval()) + + def test_column_generator(self): + with ops.Graph().as_default(): + features = features = {'a': [0.], 'b': [1.]} + columns = (fc.numeric_column(key) for key in features) + net = fc.FeatureLayer(columns)(features) + with _initialized_session(): + self.assertAllClose([[0., 1.]], net.eval()) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegexp( + ValueError, 'Duplicate feature column name found for columns'): + fc.FeatureLayer( + feature_columns=[fc.numeric_column('a'), + fc.numeric_column('a')])( + features={ + 'a': [[0]] + }) + + def test_one_column(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + net = fc.FeatureLayer([price])(features) + with _initialized_session(): + self.assertAllClose([[1.], [5.]], net.eval()) + + def test_multi_dimension(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + net = fc.FeatureLayer([price])(features) + with _initialized_session(): + self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) + + def test_compute_output_shape(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2', shape=4) + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]] + } + feature_layer = fc.FeatureLayer([price1, price2]) + self.assertEqual((None, 6), feature_layer.compute_output_shape((None,))) + net = feature_layer(features) + with _initialized_session(): + self.assertAllClose( + [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval()) + + def test_raises_if_shape_mismatch(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + with self.assertRaisesRegexp( + Exception, + r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): + fc.FeatureLayer([price])(features) + + def test_reshaping(self): + price = fc.numeric_column('price', shape=[1, 2]) + with ops.Graph().as_default(): + features = {'price': [[[1., 2.]], [[5., 6.]]]} + net = fc.FeatureLayer([price])(features) + with _initialized_session(): + self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) + + def test_multi_column(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3.], [4.]] + } + net = fc.FeatureLayer([price1, price2])(features) + with _initialized_session(): + self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval()) + + def test_cols_to_output_tensors(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + cols_dict = {} + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + feature_layer = fc.FeatureLayer([price1, price2]) + net = feature_layer(features, cols_dict) + with _initialized_session(): + self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval()) + self.assertAllClose([[3.], [4.]], cols_dict[price2].eval()) + self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval()) + + def test_column_order(self): + price_a = fc.numeric_column('price_a') + price_b = fc.numeric_column('price_b') + with ops.Graph().as_default(): + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + } + net1 = fc.FeatureLayer([price_a, price_b])(features) + net2 = fc.FeatureLayer([price_b, price_a])(features) + with _initialized_session(): + self.assertAllClose([[1., 3.]], net1.eval()) + self.assertAllClose([[1., 3.]], net2.eval()) + + def test_fails_for_categorical_column(self): + animal = fc.categorical_column_with_identity('animal', num_buckets=4) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'): + fc.FeatureLayer([animal])(features) + + def test_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1.], [5.], [7.]], # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc.FeatureLayer([price1, price2])(features) + + def test_subset_of_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + price3 = fc.numeric_column('price3') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]], # batchsize = 2 + 'price3': [[3.], [4.], [5.]] # batchsize = 3 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc.FeatureLayer([price1, price2, price3])(features) + + def test_runtime_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + net = fc.FeatureLayer([price1, price2])(features) + with _initialized_session() as sess: + with self.assertRaisesRegexp(errors.OpError, + 'Dimensions of inputs should match'): + sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]}) + + def test_runtime_batch_size_matches(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + } + net = fc.FeatureLayer([price1, price2])(features) + with _initialized_session() as sess: + sess.run( + net, + feed_dict={ + features['price1']: [[1.], [5.]], + features['price2']: [[1.], [5.]], + }) + + def test_multiple_layers_with_same_embedding_column(self): + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + + with ops.Graph().as_default(): + features = { + 'sparse_feature': [['a'], ['x']], + } + all_cols = [some_embedding_column] + fc.FeatureLayer(all_cols)(features) + fc.FeatureLayer(all_cols)(features) + # Make sure that 2 variables get created in this case. + self.assertEqual(2, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + expected_var_names = [ + 'feature_layer/sparse_feature_embedding/embedding_weights:0', + 'feature_layer_1/sparse_feature_embedding/embedding_weights:0' + ] + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + + def test_multiple_layers_with_same_shared_embedding_column(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + shared_state_manager = fc.SharedEmbeddingStateManager( + name='shared_feature_layer') + + with ops.Graph().as_default(): + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + all_cols = [embedding_column_a, embedding_column_b] + fc.FeatureLayer( + all_cols, shared_state_manager=shared_state_manager)( + features) + fc.FeatureLayer( + all_cols, shared_state_manager=shared_state_manager)( + features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertItemsEqual( + ['shared_feature_layer/aaa_bbb_shared_embedding:0'], + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + + def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + all_cols = [embedding_column_a, embedding_column_b] + + with ops.Graph().as_default(): + shared_state_manager1 = fc.SharedEmbeddingStateManager( + name='shared_feature_layer') + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + fc.FeatureLayer( + all_cols, shared_state_manager=shared_state_manager1)( + features) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + + with ops.Graph().as_default(): + shared_state_manager2 = fc.SharedEmbeddingStateManager( + name='shared_feature_layer') + features1 = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + + fc.FeatureLayer( + all_cols, shared_state_manager=shared_state_manager2)( + features1) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertItemsEqual( + ['shared_feature_layer/aaa_bbb_shared_embedding:0'], + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + + def test_with_numpy_input_fn(self): + embedding_values = ( + (1., 2., 3., 4., 5.), # id 0 + (6., 7., 8., 9., 10.), # id 1 + (11., 12., 13., 14., 15.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in feature_layer + price = fc.numeric_column('price') + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + # one_hot_body_style has 3 dims in feature_layer. + one_hot_body_style = fc.indicator_column(body_style) + # embedded_body_style has 5 dims in feature_layer. + embedded_body_style = fc.embedding_column( + body_style, dimension=5, initializer=_initializer) + + input_fn = numpy_io.numpy_input_fn( + x={ + 'price': np.array([11., 12., 13., 14.]), + 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), + }, + batch_size=2, + shuffle=False) + features = input_fn() + net = fc.FeatureLayer([price, one_hot_body_style, embedded_body_style])( + features) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[11., 12., 13., 14., 15., 0., 0., 1., 11.], + [1., 2., 3., 4., 5., 1., 0., 0., 12]], + sess.run(net)) + + coord.request_stop() + coord.join(threads) + + def test_with_1d_sparse_tensor(self): + embedding_values = ( + (1., 2., 3., 4., 5.), # id 0 + (6., 7., 8., 9., 10.), # id 1 + (11., 12., 13., 14., 15.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in feature_layer + price = fc.numeric_column('price') + + # one_hot_body_style has 3 dims in feature_layer. + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + one_hot_body_style = fc.indicator_column(body_style) + + # embedded_body_style has 5 dims in feature_layer. + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + embedded_country = fc.embedding_column( + country, dimension=5, initializer=_initializer) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': constant_op.constant([11., 12.,]), + 'body-style': sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + # This is dense tensor for the categorical_column. + 'country': constant_op.constant(['CA', 'US']), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + self.assertEqual(1, features['country'].shape.ndims) + + net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])( + features) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[0., 0., 1., 11., 12., 13., 14., 15., 11.], + [1., 0., 0., 1., 2., 3., 4., 5., 12.]], + sess.run(net)) + + def test_with_1d_unknown_shape_sparse_tensor(self): + embedding_values = ( + (1., 2.), # id 0 + (6., 7.), # id 1 + (11., 12.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + del shape, dtype, partition_info + return embedding_values + + # price has 1 dimension in feature_layer + price = fc.numeric_column('price') + + # one_hot_body_style has 3 dims in feature_layer. + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + one_hot_body_style = fc.indicator_column(body_style) + + # embedded_body_style has 5 dims in feature_layer. + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + embedded_country = fc.embedding_column( + country, dimension=2, initializer=_initializer) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + # This is dense tensor for the categorical_column. + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + self.assertIsNone(features['country'].shape.ndims) + + price_data = np.array([11., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)) + country_data = np.array([['US'], ['CA']]) + + net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])( + features) + self.assertEqual(1 + 3 + 2, net.shape[1]) + with _initialized_session() as sess: + + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) + + def test_with_rank_0_feature(self): + # price has 1 dimension in feature_layer + price = fc.numeric_column('price') + features = { + 'price': constant_op.constant(0), + } + self.assertEqual(0, features['price'].shape.ndims) + + # Static rank 0 should fail + with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): + fc.FeatureLayer([price])(features) + + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = fc.FeatureLayer([price])(features) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) + + +class InputLayerTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes + def test_retrieving_input(self): + features = {'a': [0.]} + input_layer = fc_old.InputLayer(fc.numeric_column('a')) + inputs = self.evaluate(input_layer(features)) + self.assertAllClose([[0.]], inputs) + + def test_reuses_variables(self): + with context.eager_mode(): + sparse_input = sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (2, 0)), + values=(0, 1, 2), + dense_shape=(3, 3)) + + # Create feature columns (categorical and embedding). + categorical_column = fc.categorical_column_with_identity( + key='a', num_buckets=3) + embedding_dimension = 2 + + def _embedding_column_initializer(shape, dtype, partition_info): + del shape # unused + del dtype # unused + del partition_info # unused + embedding_values = ( + (1, 0), # id 0 + (0, 1), # id 1 + (1, 1)) # id 2 + return embedding_values + + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_embedding_column_initializer) + + input_layer = fc_old.InputLayer([embedding_column]) + features = {'a': sparse_input} + + inputs = input_layer(features) + variables = input_layer.variables + + # Sanity check: test that the inputs are correct. + self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs) # Check that only one variable was created. self.assertEqual(1, len(variables)) - # Check that invoking feature_layer on the same features does not create + # Check that invoking input_layer on the same features does not create # additional variables - _ = feature_layer(features) + _ = input_layer(features) self.assertEqual(1, len(variables)) - self.assertEqual(variables[0], feature_layer.variables[0]) + self.assertEqual(variables[0], input_layer.variables[0]) - def test_feature_column_feature_layer_gradient(self): + def test_feature_column_input_layer_gradient(self): with context.eager_mode(): sparse_input = sparse_tensor.SparseTensor( indices=((0, 0), (1, 0), (2, 0)), @@ -1815,11 +3589,11 @@ class FeatureLayerTest(test.TestCase): dimension=embedding_dimension, initializer=_embedding_column_initializer) - feature_layer = FeatureLayer([embedding_column]) + input_layer = fc_old.InputLayer([embedding_column]) features = {'a': sparse_input} def scale_matrix(): - matrix = feature_layer(features) + matrix = input_layer(features) return 2 * matrix # Sanity check: Verify that scale_matrix returns the correct output. @@ -1834,32 +3608,32 @@ class FeatureLayerTest(test.TestCase): self.assertAllEqual([0, 1, 2], indexed_slice.indices) self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient) + +class FunctionalInputLayerTest(test.TestCase): + def test_raises_if_empty_feature_columns(self): with self.assertRaisesRegexp(ValueError, 'feature_columns must not be empty'): - FeatureLayer(feature_columns=[])(features={}) + fc_old.input_layer(features={}, feature_columns=[]) def test_should_be_dense_column(self): - with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'): - FeatureLayer(feature_columns=[ - fc.categorical_column_with_hash_bucket('wire_cast', 4) - ])( - features={ - 'a': [[0]] - }) + with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'): + fc_old.input_layer( + features={'a': [[0]]}, + feature_columns=[ + fc.categorical_column_with_hash_bucket('wire_cast', 4) + ]) def test_does_not_support_dict_columns(self): with self.assertRaisesRegexp( ValueError, 'Expected feature_columns to be iterable, found dict.'): - FeatureLayer(feature_columns={'a': fc.numeric_column('a')})( - features={ - 'a': [[0]] - }) + fc_old.input_layer( + features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')}) def test_bare_column(self): with ops.Graph().as_default(): features = features = {'a': [0.]} - net = FeatureLayer(fc.numeric_column('a'))(features) + net = fc_old.input_layer(features, fc.numeric_column('a')) with _initialized_session(): self.assertAllClose([[0.]], net.eval()) @@ -1867,25 +3641,23 @@ class FeatureLayerTest(test.TestCase): with ops.Graph().as_default(): features = features = {'a': [0.], 'b': [1.]} columns = (fc.numeric_column(key) for key in features) - net = FeatureLayer(columns)(features) + net = fc_old.input_layer(features, columns) with _initialized_session(): self.assertAllClose([[0., 1.]], net.eval()) def test_raises_if_duplicate_name(self): with self.assertRaisesRegexp( ValueError, 'Duplicate feature column name found for columns'): - FeatureLayer( + fc_old.input_layer( + features={'a': [[0]]}, feature_columns=[fc.numeric_column('a'), - fc.numeric_column('a')])( - features={ - 'a': [[0]] - }) + fc.numeric_column('a')]) def test_one_column(self): price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - net = FeatureLayer([price])(features) + net = fc_old.input_layer(features, [price]) with _initialized_session(): self.assertAllClose([[1.], [5.]], net.eval()) @@ -1893,25 +3665,10 @@ class FeatureLayerTest(test.TestCase): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} - net = FeatureLayer([price])(features) + net = fc_old.input_layer(features, [price]) with _initialized_session(): self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) - def test_compute_output_shape(self): - price1 = fc.numeric_column('price1', shape=2) - price2 = fc.numeric_column('price2', shape=4) - with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]] - } - feature_layer = FeatureLayer([price1, price2]) - self.assertEqual((None, 6), feature_layer.compute_output_shape((None,))) - net = feature_layer(features) - with _initialized_session(): - self.assertAllClose( - [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval()) - def test_raises_if_shape_mismatch(self): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): @@ -1919,13 +3676,13 @@ class FeatureLayerTest(test.TestCase): with self.assertRaisesRegexp( Exception, r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - FeatureLayer([price])(features) + fc_old.input_layer(features, [price]) def test_reshaping(self): price = fc.numeric_column('price', shape=[1, 2]) with ops.Graph().as_default(): features = {'price': [[[1., 2.]], [[5., 6.]]]} - net = FeatureLayer([price])(features) + net = fc_old.input_layer(features, [price]) with _initialized_session(): self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) @@ -1933,26 +3690,128 @@ class FeatureLayerTest(test.TestCase): price1 = fc.numeric_column('price1', shape=2) price2 = fc.numeric_column('price2') with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3.], [4.]] - } - net = FeatureLayer([price1, price2])(features) + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + net = fc_old.input_layer(features, [price1, price2]) with _initialized_session(): self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval()) - def test_cols_to_output_tensors(self): - price1 = fc.numeric_column('price1', shape=2) - price2 = fc.numeric_column('price2') + def test_fills_cols_to_vars(self): + # Provide three _DenseColumn's to input_layer: a _NumericColumn, a + # _BucketizedColumn, and an _EmbeddingColumn. Only the _EmbeddingColumn + # creates a Variable. + price1 = fc.numeric_column('price1') + dense_feature = fc.numeric_column('dense_feature') + dense_feature_bucketized = fc.bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) with ops.Graph().as_default(): - cols_dict = {} - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - feature_layer = FeatureLayer([price1, price2]) - net = feature_layer(features, cols_dict) - with _initialized_session(): - self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval()) - self.assertAllClose([[3.], [4.]], cols_dict[price2].eval()) - self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval()) + features = { + 'price1': [[3.], [4.]], + 'dense_feature': [[-1.], [4.]], + 'sparse_feature': [['a'], ['x']], + } + cols_to_vars = {} + all_cols = [price1, dense_feature_bucketized, some_embedding_column] + fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars) + self.assertItemsEqual(list(cols_to_vars.keys()), all_cols) + self.assertEqual(0, len(cols_to_vars[price1])) + self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized])) + self.assertEqual(1, len(cols_to_vars[some_embedding_column])) + self.assertIsInstance(cols_to_vars[some_embedding_column][0], + variables_lib.Variable) + self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10]) + + def test_fills_cols_to_vars_shared_embedding(self): + # Provide 5 DenseColumn's to input_layer: a NumericColumn, a + # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The + # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns + # shared one variable. + price1 = fc.numeric_column('price1') + dense_feature = fc.numeric_column('dense_feature') + dense_feature_bucketized = fc.bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + categorical_column_a = fc_old.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc_old.categorical_column_with_identity( + key='bbb', num_buckets=3) + shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) + with ops.Graph().as_default(): + features = { + 'price1': [[3.], [4.]], + 'dense_feature': [[-1.], [4.]], + 'sparse_feature': [['a'], ['x']], + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + cols_to_vars = {} + all_cols = [ + price1, dense_feature_bucketized, some_embedding_column, + shared_embedding_a, shared_embedding_b + ] + fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars) + self.assertItemsEqual(list(cols_to_vars.keys()), all_cols) + self.assertEqual(0, len(cols_to_vars[price1])) + self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized])) + self.assertEqual(1, len(cols_to_vars[some_embedding_column])) + self.assertEqual(1, len(cols_to_vars[shared_embedding_a])) + # This is a bug in the current implementation and should be fixed in the + # new one. + self.assertEqual(0, len(cols_to_vars[shared_embedding_b])) + self.assertIsInstance(cols_to_vars[some_embedding_column][0], + variables_lib.Variable) + self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10]) + self.assertIsInstance(cols_to_vars[shared_embedding_a][0], + variables_lib.Variable) + self.assertAllEqual(cols_to_vars[shared_embedding_a][0].shape, [3, 2]) + + def test_fills_cols_to_vars_partitioned_variables(self): + price1 = fc.numeric_column('price1') + dense_feature = fc.numeric_column('dense_feature') + dense_feature_bucketized = fc.bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + with ops.Graph().as_default(): + features = { + 'price1': [[3.], [4.]], + 'dense_feature': [[-1.], [4.]], + 'sparse_feature': [['a'], ['x']], + } + cols_to_vars = {} + all_cols = [price1, dense_feature_bucketized, some_embedding_column] + with variable_scope.variable_scope( + 'input_from_feature_columns', + partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)): + fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars) + self.assertItemsEqual(list(cols_to_vars.keys()), all_cols) + self.assertEqual(0, len(cols_to_vars[price1])) + self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized])) + self.assertEqual(3, len(cols_to_vars[some_embedding_column])) + self.assertEqual( + 'input_from_feature_columns/input_layer/sparse_feature_embedding/' + 'embedding_weights/part_0:0', + cols_to_vars[some_embedding_column][0].name) + self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [2, 10]) + self.assertAllEqual(cols_to_vars[some_embedding_column][1].shape, [2, 10]) + self.assertAllEqual(cols_to_vars[some_embedding_column][2].shape, [1, 10]) def test_column_order(self): price_a = fc.numeric_column('price_a') @@ -1962,8 +3821,8 @@ class FeatureLayerTest(test.TestCase): 'price_a': [[1.]], 'price_b': [[3.]], } - net1 = FeatureLayer([price_a, price_b])(features) - net2 = FeatureLayer([price_b, price_a])(features) + net1 = fc_old.input_layer(features, [price_a, price_b]) + net2 = fc_old.input_layer(features, [price_b, price_a]) with _initialized_session(): self.assertAllClose([[1., 3.]], net1.eval()) self.assertAllClose([[1., 3.]], net2.eval()) @@ -1976,8 +3835,8 @@ class FeatureLayerTest(test.TestCase): sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) } - with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'): - FeatureLayer([animal])(features) + with self.assertRaisesRegexp(Exception, 'must be a _DenseColumn'): + fc_old.input_layer(features, [animal]) def test_static_batch_size_mismatch(self): price1 = fc.numeric_column('price1') @@ -1990,7 +3849,7 @@ class FeatureLayerTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - FeatureLayer([price1, price2])(features) + fc_old.input_layer(features, [price1, price2]) def test_subset_of_static_batch_size_mismatch(self): price1 = fc.numeric_column('price1') @@ -2005,7 +3864,7 @@ class FeatureLayerTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - FeatureLayer([price1, price2, price3])(features) + fc_old.input_layer(features, [price1, price2, price3]) def test_runtime_batch_size_mismatch(self): price1 = fc.numeric_column('price1') @@ -2015,7 +3874,7 @@ class FeatureLayerTest(test.TestCase): 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 'price2': [[3.], [4.]] # batchsize = 2 } - net = FeatureLayer([price1, price2])(features) + net = fc_old.input_layer(features, [price1, price2]) with _initialized_session() as sess: with self.assertRaisesRegexp(errors.OpError, 'Dimensions of inputs should match'): @@ -2029,7 +3888,7 @@ class FeatureLayerTest(test.TestCase): 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 } - net = FeatureLayer([price1, price2])(features) + net = fc_old.input_layer(features, [price1, price2]) with _initialized_session() as sess: sess.run( net, @@ -2049,181 +3908,39 @@ class FeatureLayerTest(test.TestCase): 'sparse_feature': [['a'], ['x']], } all_cols = [some_embedding_column] - FeatureLayer(all_cols)(features) - FeatureLayer(all_cols)(features) - # Make sure that 2 variables get created in this case. - self.assertEqual(2, len( - ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) - expected_var_names = [ - 'feature_layer/sparse_feature_embedding/embedding_weights:0', - 'feature_layer_1/sparse_feature_embedding/embedding_weights:0' - ] - self.assertItemsEqual( - expected_var_names, - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - - def test_multiple_layers_with_same_shared_embedding_column(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - shared_state_manager = fc.SharedEmbeddingStateManager( - name='shared_feature_layer') - - with ops.Graph().as_default(): - features = { - 'aaa': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - all_cols = [embedding_column_a, embedding_column_b] - FeatureLayer( - all_cols, shared_state_manager=shared_state_manager)( - features) - FeatureLayer( - all_cols, shared_state_manager=shared_state_manager)( - features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, len( - ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) - self.assertItemsEqual( - ['shared_feature_layer/aaa_bbb_shared_embedding:0'], - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - - def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - all_cols = [embedding_column_a, embedding_column_b] - - with ops.Graph().as_default(): - shared_state_manager1 = fc.SharedEmbeddingStateManager( - name='shared_feature_layer') - features = { - 'aaa': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - FeatureLayer( - all_cols, shared_state_manager=shared_state_manager1)( - features) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, len( - ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) - - with ops.Graph().as_default(): - shared_state_manager2 = fc.SharedEmbeddingStateManager( - name='shared_feature_layer') - features1 = { - 'aaa': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - - FeatureLayer( - all_cols, shared_state_manager=shared_state_manager2)( - features1) - # Make sure that only 1 variable gets created in this case. - self.assertEqual(1, len( + fc_old.input_layer(features, all_cols) + fc_old.input_layer(features, all_cols) + # Make sure that 2 variables get created in this case. + self.assertEqual(2, len( ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + expected_var_names = [ + 'input_layer/sparse_feature_embedding/embedding_weights:0', + 'input_layer_1/sparse_feature_embedding/embedding_weights:0' + ] self.assertItemsEqual( - ['shared_feature_layer/aaa_bbb_shared_embedding:0'], + expected_var_names, [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - def test_with_numpy_input_fn(self): - embedding_values = ( - (1., 2., 3., 4., 5.), # id 0 - (6., 7., 8., 9., 10.), # id 1 - (11., 12., 13., 14., 15.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - del shape, dtype, partition_info - return embedding_values - - # price has 1 dimension in feature_layer - price = fc.numeric_column('price') - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - # one_hot_body_style has 3 dims in feature_layer. - one_hot_body_style = fc.indicator_column(body_style) - # embedded_body_style has 5 dims in feature_layer. - embedded_body_style = fc.embedding_column( - body_style, dimension=5, initializer=_initializer) - - input_fn = numpy_io.numpy_input_fn( - x={ - 'price': np.array([11., 12., 13., 14.]), - 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), - }, - batch_size=2, - shuffle=False) - features = input_fn() - net = FeatureLayer([price, one_hot_body_style, embedded_body_style])( - features) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[11., 12., 13., 14., 15., 0., 0., 1., 11.], - [1., 2., 3., 4., 5., 1., 0., 0., 12]], - sess.run(net)) - - coord.request_stop() - coord.join(threads) - def test_with_1d_sparse_tensor(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) + def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values - # price has 1 dimension in feature_layer + # price has 1 dimension in input_layer price = fc.numeric_column('price') - # one_hot_body_style has 3 dims in feature_layer. + # one_hot_body_style has 3 dims in input_layer. body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) one_hot_body_style = fc.indicator_column(body_style) - # embedded_body_style has 5 dims in feature_layer. + # embedded_body_style has 5 dims in input_layer. country = fc.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) embedded_country = fc.embedding_column( @@ -2231,28 +3948,34 @@ class FeatureLayerTest(test.TestCase): # Provides 1-dim tensor and dense tensor. features = { - 'price': constant_op.constant([11., 12.,]), - 'body-style': sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), + 'price': + constant_op.constant([ + 11., + 12., + ]), + 'body-style': + sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), # This is dense tensor for the categorical_column. - 'country': constant_op.constant(['CA', 'US']), + 'country': + constant_op.constant(['CA', 'US']), } self.assertEqual(1, features['price'].shape.ndims) self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) self.assertEqual(1, features['country'].shape.ndims) - net = FeatureLayer([price, one_hot_body_style, embedded_country])(features) + net = fc_old.input_layer(features, + [price, one_hot_body_style, embedded_country]) self.assertEqual(1 + 3 + 5, net.shape[1]) with _initialized_session() as sess: # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 11., 12., 13., 14., 15., 11.], - [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) + self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], + [1., 0., 0., 1., 2., 3., 4., 5., 12.]], + sess.run(net)) def test_with_1d_unknown_shape_sparse_tensor(self): embedding_values = ( @@ -2260,19 +3983,20 @@ class FeatureLayerTest(test.TestCase): (6., 7.), # id 1 (11., 12.) # id 2 ) + def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values - # price has 1 dimension in feature_layer + # price has 1 dimension in input_layer price = fc.numeric_column('price') - # one_hot_body_style has 3 dims in feature_layer. + # one_hot_body_style has 3 dims in input_layer. body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) one_hot_body_style = fc.indicator_column(body_style) - # embedded_body_style has 5 dims in feature_layer. + # embedded_body_style has 5 dims in input_layer. country = fc.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) embedded_country = fc.embedding_column( @@ -2291,12 +4015,11 @@ class FeatureLayerTest(test.TestCase): price_data = np.array([11., 12.]) body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)) + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) country_data = np.array([['US'], ['CA']]) - net = FeatureLayer([price, one_hot_body_style, embedded_country])(features) + net = fc_old.input_layer(features, + [price, one_hot_body_style, embedded_country]) self.assertEqual(1 + 3 + 2, net.shape[1]) with _initialized_session() as sess: @@ -2313,7 +4036,7 @@ class FeatureLayerTest(test.TestCase): })) def test_with_rank_0_feature(self): - # price has 1 dimension in feature_layer + # price has 1 dimension in input_layer price = fc.numeric_column('price') features = { 'price': constant_op.constant(0), @@ -2322,13 +4045,13 @@ class FeatureLayerTest(test.TestCase): # Static rank 0 should fail with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): - FeatureLayer([price])(features) + fc_old.input_layer(features, [price]) # Dynamic rank 0 should fail features = { 'price': array_ops.placeholder(dtypes.float32), } - net = FeatureLayer([price])(features) + net = fc_old.input_layer(features, [price]) self.assertEqual(1, net.shape[1]) with _initialized_session() as sess: with self.assertRaisesOpError('Feature .* cannot have rank 0'): @@ -2337,10 +4060,14 @@ class FeatureLayerTest(test.TestCase): class MakeParseExampleSpecTest(test.TestCase): - class _TestFeatureColumn(FeatureColumn, + class _TestFeatureColumn(fc.FeatureColumn, collections.namedtuple('_TestFeatureColumn', ('parse_spec'))): + @property + def _is_v2_column(self): + return True + @property def name(self): return '_TestFeatureColumn' @@ -2458,6 +4185,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.string) }, column.parse_example_spec) + self.assertTrue(column._is_v2_column) def test_key_should_be_string(self): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): @@ -2501,7 +4229,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None) + column.get_sparse_tensors( + fc.FeatureTransformationCache({ + 'aaa': inputs + }), None) with self.assertRaisesRegexp(errors.OpError, 'file_does_not_exist'): with self.cached_session(): lookup_ops.tables_initializer().run() @@ -2525,7 +4256,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None) + column.get_sparse_tensors( + fc.FeatureTransformationCache({ + 'aaa': inputs + }), None) with self.assertRaisesRegexp(errors.OpError, 'Invalid vocab_size'): with self.cached_session(): lookup_ops.tables_initializer().run() @@ -2564,7 +4298,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) @@ -2580,7 +4314,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) @@ -2616,7 +4350,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2637,7 +4371,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2659,7 +4393,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column], None)[column] + id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column] with _initialized_session(): _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue( @@ -2675,7 +4409,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': (('marlo', ''), ('skywalker', 'omar')) }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2699,7 +4433,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2723,7 +4457,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar', 'heisenberg'), dense_shape=(2, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2749,7 +4483,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2773,7 +4507,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=(11, 100, 30, 22), dense_shape=(3, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2795,7 +4529,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dtype=dtypes.int32, default_value=default_value) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': ((11, -1, -1), (100, 30, -1), (-1, -1, 22)) }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2820,7 +4554,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=(11, 100, 30, 22), dense_shape=(3, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -2859,6 +4593,32 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), predictions.eval()) + def test_old_linear_model(self): + wire_column = fc.categorical_column_with_vocabulary_file( + key='wire', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size, + num_oov_buckets=1) + self.assertEqual(4, wire_column.num_buckets) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 2: wire_var[2] = 3 + # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 + self.assertAllClose(((3.,), (5.,)), predictions.eval()) + class VocabularyListCategoricalColumnTest(test.TestCase): @@ -2871,6 +4631,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.string) }, column.parse_example_spec) + self.assertTrue(column._is_v2_column) def test_key_should_be_string(self): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): @@ -2973,7 +4734,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) @@ -2987,7 +4748,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) @@ -3044,7 +4805,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3065,7 +4826,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column], None)[column] + id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column] with _initialized_session(): _assert_sparse_tensor_value( self, @@ -3080,7 +4841,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': (('marlo', ''), ('skywalker', 'omar')) }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3103,7 +4864,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3126,7 +4887,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar', 'heisenberg'), dense_shape=(2, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3149,7 +4910,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=np.array((11, 100, 30, 22), dtype=np.int32), dense_shape=(3, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3170,10 +4931,10 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dtype=dtypes.int32, default_value=default_value) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': - np.array( - ((11, -1, -1), (100, 30, -1), (-1, -1, 22)), dtype=np.int32) + np.array(((11, -1, -1), (100, 30, -1), (-1, -1, 22)), + dtype=np.int32) }), None) self.assertIsNone(id_weight_pair.weight_tensor) with _initialized_session(): @@ -3196,7 +4957,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=(11, 100, 30, 22), dense_shape=(3, 3)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3234,6 +4995,31 @@ class VocabularyListCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), predictions.eval()) + def test_old_linear_model(self): + wire_column = fc.categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo'), + num_oov_buckets=1) + self.assertEqual(4, wire_column.num_buckets) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 2: wire_var[2] = 3 + # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 + self.assertAllClose(((3.,), (5.,)), predictions.eval()) + class IdentityCategoricalColumnTest(test.TestCase): @@ -3245,6 +5031,7 @@ class IdentityCategoricalColumnTest(test.TestCase): self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, column.parse_example_spec) + self.assertTrue(column._is_v2_column) def test_key_should_be_string(self): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): @@ -3285,7 +5072,7 @@ class IdentityCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'Invalid input, not integer'): column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) @@ -3317,7 +5104,7 @@ class IdentityCategoricalColumnTest(test.TestCase): values=(0, 1, 0), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3336,7 +5123,7 @@ class IdentityCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=(0, 1, 0), dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column], None)[column] + id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column] with _initialized_session(): _assert_sparse_tensor_value( self, @@ -3349,7 +5136,7 @@ class IdentityCategoricalColumnTest(test.TestCase): def test_get_sparse_tensors_dense_input(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': ((0, -1), (1, 0)) }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3369,7 +5156,7 @@ class IdentityCategoricalColumnTest(test.TestCase): values=(1, -1, 0), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3385,7 +5172,7 @@ class IdentityCategoricalColumnTest(test.TestCase): values=(1, 99, 0), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3402,7 +5189,7 @@ class IdentityCategoricalColumnTest(test.TestCase): values=(1, -1, 99), dense_shape=(2, 2)) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3426,7 +5213,7 @@ class IdentityCategoricalColumnTest(test.TestCase): values=input_values, dense_shape=input_shape) id_weight_pair = column.get_sparse_tensors( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': inputs }), None) self.assertIsNone(id_weight_pair.weight_tensor) @@ -3465,6 +5252,28 @@ class IdentityCategoricalColumnTest(test.TestCase): # weight_var[2] + weight_var[1] = 3+2 = 5 self.assertAllClose(((1.,), (5.,)), predictions.eval()) + def test_old_linear_model(self): + column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) + self.assertEqual(3, column.num_buckets) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] = 1 + # weight_var[2] + weight_var[1] = 3+2 = 5 + self.assertAllClose(((1.,), (5.,)), predictions.eval()) + class TransformFeaturesTest(test.TestCase): @@ -3483,8 +5292,8 @@ class TransformFeaturesTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) } - transformed = _transform_features(features, - [bucketized_price, hashed_sparse], None) + transformed = fc._transform_features( + features, [bucketized_price, hashed_sparse], None) with _initialized_session(): self.assertIn(bucketized_price.name, transformed[bucketized_price].name) self.assertAllEqual([[0], [3]], transformed[bucketized_price].eval()) @@ -3494,11 +5303,15 @@ class TransformFeaturesTest(test.TestCase): def test_column_order(self): """When the column is both dense and sparse, uses sparse tensors.""" - class _LoggerColumn(FeatureColumn): + class _LoggerColumn(fc.FeatureColumn): def __init__(self, name): self._name = name + @property + def _is_v2_column(self): + return True + @property def name(self): return self._name @@ -3516,12 +5329,12 @@ class TransformFeaturesTest(test.TestCase): column1 = _LoggerColumn('1') column2 = _LoggerColumn('2') call_logger = {'count': 0} - _transform_features({}, [column1, column2], None) + fc._transform_features({}, [column1, column2], None) self.assertEqual(0, column1.call_order) self.assertEqual(1, column2.call_order) call_logger = {'count': 0} - _transform_features({}, [column2, column1], None) + fc._transform_features({}, [column2, column1], None) self.assertEqual(0, column1.call_order) self.assertEqual(1, column2.call_order) @@ -3534,17 +5347,19 @@ class IndicatorColumnTest(test.TestCase): self.assertEqual(indicator_a.categorical_column.name, 'a') self.assertEqual(indicator_a.name, 'a_indicator') self.assertEqual(indicator_a.variable_shape, [1, 4]) + self.assertTrue(indicator_a._is_v2_column) - b = fc.categorical_column_with_hash_bucket('b', hash_bucket_size=100) + b = fc_old.categorical_column_with_hash_bucket('b', hash_bucket_size=100) indicator_b = fc.indicator_column(b) self.assertEqual(indicator_b.categorical_column.name, 'b') self.assertEqual(indicator_b.name, 'b_indicator') self.assertEqual(indicator_b.variable_shape, [1, 100]) + self.assertFalse(indicator_b._is_v2_column) def test_1D_shape_succeeds(self): animal = fc.indicator_column( fc.categorical_column_with_hash_bucket('animal', 4)) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'animal': ['fox', 'fox'] }) output = transformation_cache.get(animal, None) @@ -3555,7 +5370,7 @@ class IndicatorColumnTest(test.TestCase): # TODO(ispir/cassandrax): Swith to categorical_column_with_keys when ready. animal = fc.indicator_column( fc.categorical_column_with_hash_bucket('animal', 4)) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'animal': sparse_tensor.SparseTensor( indices=[[0, 0], [1, 0]], @@ -3570,7 +5385,7 @@ class IndicatorColumnTest(test.TestCase): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'animal': sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 1], dense_shape=[1, 2]) @@ -3582,7 +5397,7 @@ class IndicatorColumnTest(test.TestCase): def test_multi_hot2(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) - transformation_cache = FeatureTransformationCache({ + transformation_cache = fc.FeatureTransformationCache({ 'animal': sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) @@ -3632,8 +5447,8 @@ class IndicatorColumnTest(test.TestCase): values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) } - indicator_tensor = _transform_features(features, [a_indicator], - None)[a_indicator] + indicator_tensor = fc._transform_features(features, [a_indicator], + None)[a_indicator] with _initialized_session(): self.assertAllEqual([[0, 0, 1], [1, 0, 0]], indicator_tensor.eval()) @@ -3647,8 +5462,8 @@ class IndicatorColumnTest(test.TestCase): 'ids': constant_op.constant([['c', 'b', 'a']]), 'weights': constant_op.constant([[2., 4., 6.]]) } - indicator_tensor = _transform_features(features, [indicator], - None)[indicator] + indicator_tensor = fc._transform_features(features, [indicator], + None)[indicator] with _initialized_session(): self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) @@ -3662,8 +5477,8 @@ class IndicatorColumnTest(test.TestCase): 'ids': constant_op.constant([['c', 'b', 'unknown']]), 'weights': constant_op.constant([[2., 4., 6.]]) } - indicator_tensor = _transform_features(features, [indicator], - None)[indicator] + indicator_tensor = fc._transform_features(features, [indicator], + None)[indicator] with _initialized_session(): self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval()) @@ -3675,8 +5490,8 @@ class IndicatorColumnTest(test.TestCase): features = { 'ids': constant_op.constant([['c', 'b', 'unknown']]), } - indicator_tensor = _transform_features(features, [indicator], - None)[indicator] + indicator_tensor = fc._transform_features(features, [indicator], + None)[indicator] with _initialized_session(): self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval()) @@ -3700,6 +5515,44 @@ class IndicatorColumnTest(test.TestCase): weight_var.assign([[1.], [2.], [3.], [4.]]).eval() self.assertAllClose([[2. + 3.]], predictions.eval()) + def test_old_linear_model(self): + animal = fc.indicator_column( + fc.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + + predictions = fc_old.linear_model(features, [animal]) + weight_var = get_linear_model_column_var(animal) + with _initialized_session(): + # All should be zero-initialized. + self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval()) + self.assertAllClose([[0.]], predictions.eval()) + weight_var.assign([[1.], [2.], [3.], [4.]]).eval() + self.assertAllClose([[2. + 3.]], predictions.eval()) + + def test_old_linear_model_old_categorical(self): + animal = fc.indicator_column( + fc_old.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + + predictions = fc_old.linear_model(features, [animal]) + weight_var = get_linear_model_column_var(animal) + with _initialized_session(): + # All should be zero-initialized. + self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval()) + self.assertAllClose([[0.]], predictions.eval()) + weight_var.assign([[1.], [2.], [3.], [4.]]).eval() + self.assertAllClose([[2. + 3.]], predictions.eval()) + def test_feature_layer(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) @@ -3709,12 +5562,38 @@ class IndicatorColumnTest(test.TestCase): sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) } - net = FeatureLayer([animal])(features) + net = fc.FeatureLayer([animal])(features) + with _initialized_session(): + self.assertAllClose([[0., 1., 1., 0.]], net.eval()) + + def test_input_layer(self): + animal = fc.indicator_column( + fc.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + net = fc_old.input_layer(features, [animal]) + with _initialized_session(): + self.assertAllClose([[0., 1., 1., 0.]], net.eval()) + + def test_input_layer_old_categorical(self): + animal = fc.indicator_column( + fc_old.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + net = fc_old.input_layer(features, [animal]) with _initialized_session(): self.assertAllClose([[0., 1., 1., 0.]], net.eval()) -class _TestStateManager(StateManager): +class _TestStateManager(fc.StateManager): def __init__(self, trainable=True): # Dict of feature_column to a dict of variables. @@ -3771,6 +5650,15 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column.parse_example_spec) + self.assertTrue(embedding_column._is_v2_column) + + def test_is_v2_column(self): + categorical_column = fc_old.categorical_column_with_identity( + key='aaa', num_buckets=3) + embedding_dimension = 2 + embedding_column = fc.embedding_column( + categorical_column, dimension=embedding_dimension) + self.assertFalse(embedding_column._is_v2_column) def test_all_constructor_args(self): categorical_column = fc.categorical_column_with_identity( @@ -3860,7 +5748,7 @@ class EmbeddingColumnTest(test.TestCase): values=(0, 1, 0), dense_shape=(2, 2)) } - outputs = _transform_features(features, [a, a_embedded], None) + outputs = fc._transform_features(features, [a, a_embedded], None) output_a = outputs[a] output_embedded = outputs[a_embedded] with _initialized_session(): @@ -3905,19 +5793,79 @@ class EmbeddingColumnTest(test.TestCase): ) # Build columns. - categorical_column = fc.categorical_column_with_identity( + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + state_manager = _TestStateManager() + embedding_column.create_state(state_manager) + + # Provide sparse input and get dense result. + embedding_lookup = embedding_column.get_dense_tensor( + fc.FeatureTransformationCache({ + 'aaa': sparse_input + }), state_manager) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0].eval()) + self.assertAllEqual(expected_lookups, embedding_lookup.eval()) + + def test_get_dense_tensor_old_categorical(self): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc_old.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( - categorical_column, dimension=embedding_dimension, + categorical_column, + dimension=embedding_dimension, initializer=_initializer) - state_manager = _TestStateManager() - embedding_column.create_state(state_manager) # Provide sparse input and get dense result. - embedding_lookup = embedding_column.get_dense_tensor( - FeatureTransformationCache({ + embedding_lookup = embedding_column._get_dense_tensor( + fc_old._LazyBuilder({ 'aaa': sparse_input - }), state_manager) + })) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -3977,7 +5925,7 @@ class EmbeddingColumnTest(test.TestCase): # Provide sparse input and get dense result. embedding_lookup = embedding_column.get_dense_tensor( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': sparse_input }), state_manager) @@ -4040,7 +5988,7 @@ class EmbeddingColumnTest(test.TestCase): input_values = array_ops.placeholder(dtype=dtypes.int64) input_shape = array_ops.placeholder(dtype=dtypes.int64) embedding_lookup = embedding_column.get_dense_tensor( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': sparse_tensor.SparseTensorValue( indices=input_indices, @@ -4108,7 +6056,7 @@ class EmbeddingColumnTest(test.TestCase): # Provide sparse input and get dense result. embedding_lookup = embedding_column.get_dense_tensor( - FeatureTransformationCache({ + fc.FeatureTransformationCache({ 'aaa': sparse_input }), state_manager) @@ -4120,7 +6068,263 @@ class EmbeddingColumnTest(test.TestCase): self.assertAllEqual(embedding_values, global_vars[0].eval()) self.assertAllEqual(expected_lookups, embedding_lookup.eval()) - def test_linear_model(self): + def test_linear_model(self): + # Inputs. + batch_size = 4 + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(batch_size, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual(embedding_shape, shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return zeros_embedding_values + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + with ops.Graph().as_default(): + model = fc.LinearModel((embedding_column,)) + predictions = model({categorical_column.name: sparse_input}) + expected_var_names = ( + 'linear_model/bias_weights:0', + 'linear_model/aaa_embedding/weights:0', + 'linear_model/aaa_embedding/embedding_weights:0', + ) + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + trainable_vars = { + v.name: v for v in ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES) + } + self.assertItemsEqual(expected_var_names, trainable_vars.keys()) + bias = trainable_vars['linear_model/bias_weights:0'] + embedding_weights = trainable_vars[ + 'linear_model/aaa_embedding/embedding_weights:0'] + linear_weights = trainable_vars[ + 'linear_model/aaa_embedding/weights:0'] + with _initialized_session(): + # Predictions with all zero weights. + self.assertAllClose(np.zeros((1,)), bias.eval()) + self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) + self.assertAllClose( + np.zeros((embedding_dimension, 1)), linear_weights.eval()) + self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) + + # Predictions with all non-zero weights. + embedding_weights.assign(( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + )).eval() + linear_weights.assign(((4.,), (6.,))).eval() + # example 0, ids [2], embedding[0] = [7, 11] + # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] + # example 2, ids [], embedding[2] = [0, 0] + # example 3, ids [1], embedding[3] = [3, 5] + # sum(embeddings * linear_weights) + # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] + self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) + + def test_feature_layer(self): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + # Provide sparse input and get dense result. + l = fc.FeatureLayer((embedding_column,)) + feature_layer = l({'aaa': sparse_input}) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in trainable_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, trainable_vars[0].eval()) + self.assertAllEqual(expected_lookups, feature_layer.eval()) + + def test_feature_layer_not_trainable(self): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + trainable=False) + + # Provide sparse input and get dense result. + feature_layer = fc.FeatureLayer((embedding_column,))({'aaa': sparse_input}) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + self.assertItemsEqual( + [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0].eval()) + self.assertAllEqual(expected_lookups, feature_layer.eval()) + + def test_input_layer(self): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + # Provide sparse input and get dense result. + feature_layer = fc_old.input_layer({ + 'aaa': sparse_input + }, (embedding_column,)) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in trainable_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, trainable_vars[0].eval()) + self.assertAllEqual(expected_lookups, feature_layer.eval()) + + def test_old_linear_model(self): # Inputs. batch_size = 4 vocabulary_size = 3 @@ -4137,6 +6341,7 @@ class EmbeddingColumnTest(test.TestCase): embedding_dimension = 2 embedding_shape = (vocabulary_size, embedding_dimension) zeros_embedding_values = np.zeros(embedding_shape) + def _initializer(shape, dtype, partition_info): self.assertAllEqual(embedding_shape, shape) self.assertEqual(dtypes.float32, dtype) @@ -4152,8 +6357,9 @@ class EmbeddingColumnTest(test.TestCase): initializer=_initializer) with ops.Graph().as_default(): - model = fc.LinearModel((embedding_column,)) - predictions = model({categorical_column.name: sparse_input}) + predictions = fc_old.linear_model({ + categorical_column.name: sparse_input + }, (embedding_column,)) expected_var_names = ( 'linear_model/bias_weights:0', 'linear_model/aaa_embedding/weights:0', @@ -4163,15 +6369,14 @@ class EmbeddingColumnTest(test.TestCase): expected_var_names, [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) trainable_vars = { - v.name: v for v in ops.get_collection( - ops.GraphKeys.TRAINABLE_VARIABLES) + v.name: v + for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) } self.assertItemsEqual(expected_var_names, trainable_vars.keys()) bias = trainable_vars['linear_model/bias_weights:0'] embedding_weights = trainable_vars[ 'linear_model/aaa_embedding/embedding_weights:0'] - linear_weights = trainable_vars[ - 'linear_model/aaa_embedding/weights:0'] + linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] with _initialized_session(): # Predictions with all zero weights. self.assertAllClose(np.zeros((1,)), bias.eval()) @@ -4195,8 +6400,9 @@ class EmbeddingColumnTest(test.TestCase): # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) - def test_feature_layer(self): + def test_old_linear_model_old_categorical(self): # Inputs. + batch_size = 4 vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] @@ -4205,114 +6411,70 @@ class EmbeddingColumnTest(test.TestCase): # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), - dense_shape=(4, 5)) + dense_shape=(batch_size, 5)) # Embedding variable. embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) - - # Build columns. - categorical_column = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - # Provide sparse input and get dense result. - l = FeatureLayer((embedding_column,)) - feature_layer = l({'aaa': sparse_input}) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in trainable_vars])) - with _initialized_session(): - self.assertAllEqual(embedding_values, trainable_vars[0].eval()) - self.assertAllEqual(expected_lookups, feature_layer.eval()) - - def test_feature_layer_not_trainable(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertAllEqual(embedding_shape, shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) + return zeros_embedding_values # Build columns. - categorical_column = fc.categorical_column_with_identity( + categorical_column = fc_old.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, - initializer=_initializer, - trainable=False) + initializer=_initializer) - # Provide sparse input and get dense result. - feature_layer = FeatureLayer((embedding_column,))({'aaa': sparse_input}) + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + categorical_column.name: sparse_input + }, (embedding_column,)) + expected_var_names = ( + 'linear_model/bias_weights:0', + 'linear_model/aaa_embedding/weights:0', + 'linear_model/aaa_embedding/embedding_weights:0', + ) + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + trainable_vars = { + v.name: v + for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + } + self.assertItemsEqual(expected_var_names, trainable_vars.keys()) + bias = trainable_vars['linear_model/bias_weights:0'] + embedding_weights = trainable_vars[ + 'linear_model/aaa_embedding/embedding_weights:0'] + linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] + with _initialized_session(): + # Predictions with all zero weights. + self.assertAllClose(np.zeros((1,)), bias.eval()) + self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) + self.assertAllClose( + np.zeros((embedding_dimension, 1)), linear_weights.eval()) + self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - self.assertItemsEqual( - [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0].eval()) - self.assertAllEqual(expected_lookups, feature_layer.eval()) + # Predictions with all non-zero weights. + embedding_weights.assign(( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + )).eval() + linear_weights.assign(((4.,), (6.,))).eval() + # example 0, ids [2], embedding[0] = [7, 11] + # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] + # example 2, ids [], embedding[2] = [0, 0] + # example 3, ids [1], embedding[3] = [3, 5] + # sum(embeddings * linear_weights) + # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] + self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) class SharedEmbeddingColumnTest(test.TestCase): @@ -4530,8 +6692,8 @@ class SharedEmbeddingColumnTest(test.TestCase): values=(1, 2, 1), dense_shape=(2, 2)), } - outputs = _transform_features(features, [a, a_embedded, b, b_embedded], - None) + outputs = fc._transform_features(features, [a, a_embedded, b, b_embedded], + None) output_a = outputs[a] output_a_embedded = outputs[a_embedded] output_b = outputs[b] @@ -4599,9 +6761,9 @@ class SharedEmbeddingColumnTest(test.TestCase): # Provide sparse input and get dense result. embedding_lookup_a = embedding_column_a.get_dense_tensor( - FeatureTransformationCache(input_features), state_manager) + fc.FeatureTransformationCache(input_features), state_manager) embedding_lookup_b = embedding_column_b.get_dense_tensor( - FeatureTransformationCache(input_features), state_manager) + fc.FeatureTransformationCache(input_features), state_manager) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -4665,9 +6827,9 @@ class SharedEmbeddingColumnTest(test.TestCase): # Provide sparse input and get dense result. embedding_lookup_a = embedding_column_a.get_dense_tensor( - FeatureTransformationCache(input_features), state_manager) + fc.FeatureTransformationCache(input_features), state_manager) embedding_lookup_b = embedding_column_b.get_dense_tensor( - FeatureTransformationCache(input_features), state_manager) + fc.FeatureTransformationCache(input_features), state_manager) with _initialized_session() as sess: sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) @@ -4852,7 +7014,7 @@ class SharedEmbeddingColumnTest(test.TestCase): } # Provide sparse input and get dense result. - feature_layer = FeatureLayer( + feature_layer = fc.FeatureLayer( feature_columns=(embedding_column_b, embedding_column_a, embedding_column_c, embedding_column_d), shared_state_manager=shared_state_manager)( @@ -4946,6 +7108,14 @@ class WeightedCategoricalColumnTest(test.TestCase): 'ids': parsing_ops.VarLenFeature(dtypes.int64), 'values': parsing_ops.VarLenFeature(dtypes.float32) }, column.parse_example_spec) + self.assertTrue(column._is_v2_column) + + def test_is_v2_column(self): + column = fc.weighted_categorical_column( + categorical_column=fc_old.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + self.assertFalse(column._is_v2_column) def test_deep_copy(self): """Tests deepcopy of categorical_column_with_hash_bucket.""" @@ -4987,7 +7157,10 @@ class WeightedCategoricalColumnTest(test.TestCase): values=('omar', 'stringer', 'marlo'), dense_shape=(2, 2)) with self.assertRaisesRegexp(ValueError, 'Bad dtype'): - _transform_features({'ids': strings, 'values': strings}, (column,), None) + fc._transform_features({ + 'ids': strings, + 'values': strings + }, (column,), None) def test_column_name_collision(self): with self.assertRaisesRegexp(ValueError, r'Parse config.*already exists'): @@ -5007,7 +7180,7 @@ class WeightedCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)) with self.assertRaisesRegexp( ValueError, 'values is not in features dictionary'): - _transform_features({'ids': inputs}, (column,), None) + fc._transform_features({'ids': inputs}, (column,), None) def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( @@ -5056,7 +7229,7 @@ class WeightedCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=(0.5, 1.0, 0.1), dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ + id_tensor, weight_tensor = fc._transform_features({ 'ids': inputs, 'values': weights, }, (column,), None)[column] @@ -5085,7 +7258,7 @@ class WeightedCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=(0.5, 1.0, 0.1), dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ + id_tensor, weight_tensor = fc._transform_features({ 'ids': ((0, -1), (1, 0)), 'values': weights, }, (column,), None)[column] @@ -5114,7 +7287,7 @@ class WeightedCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=(2, 1, 0), dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ + id_tensor, weight_tensor = fc._transform_features({ 'ids': inputs, 'values': ((.5, 0.), (1., .1)), }, (column,), None)[column] @@ -5236,6 +7409,137 @@ class WeightedCategoricalColumnTest(test.TestCase): # = 3*1 + 2*.1 = 3+.2 = 3.2 self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + def test_old_linear_model(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(.5, 1., .1), + dense_shape=(2, 2)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] * weights[0, 0] = 1 * .5 = .5 + # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] + # = 3*1 + 2*.1 = 3+.2 = 3.2 + self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + + def test_old_linear_model_mismatched_shape(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + with self.assertRaisesRegexp(ValueError, + r'Dimensions.*are not compatible'): + fc_old.linear_model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (0, 1), (1, 0), (1, 1)), + values=(.5, 11., 1., .1), + dense_shape=(2, 2)) + }, (column,)) + + def test_old_linear_model_mismatched_dense_values(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': ((.5,), (1.,)) + }, (column,), + sparse_combiner='mean') + # Disabling the constant folding optimizer here since it changes the + # error message differently on CPU and GPU. + config = config_pb2.ConfigProto() + config.graph_options.rewrite_options.constant_folding = ( + rewriter_config_pb2.RewriterConfig.OFF) + with _initialized_session(config): + with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'): + predictions.eval() + + def test_old_linear_model_mismatched_dense_shape(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': ((.5,), (1.,), (.1,)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] * weights[0, 0] = 1 * .5 = .5 + # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] + # = 3*1 + 2*.1 = 3+.2 = 3.2 + self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + + def test_old_linear_model_old_categorical(self): + column = fc.weighted_categorical_column( + categorical_column=fc_old.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = fc_old.linear_model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(.5, 1., .1), + dense_shape=(2, 2)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] * weights[0, 0] = 1 * .5 = .5 + # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] + # = 3*1 + 2*.1 = 3+.2 = 3.2 + self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + # TODO(ptucker): Add test with embedding of weighted categorical. if __name__ == '__main__': -- GitLab From 45fb1429f86b5ee6589fd50d8325843b49f78409 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 9 Oct 2018 08:48:02 -0700 Subject: [PATCH 114/411] Avoid extra calls to set_random_seed, as it is already called in tensorflowtestcase. PiperOrigin-RevId: 216363450 --- .../python/kernel_tests/linalg/linear_operator_addition_test.py | 2 -- .../kernel_tests/linalg/linear_operator_block_diag_test.py | 2 -- .../kernel_tests/linalg/linear_operator_composition_test.py | 2 -- .../python/kernel_tests/linalg/linear_operator_diag_test.py | 2 -- .../kernel_tests/linalg/linear_operator_full_matrix_test.py | 2 -- .../python/kernel_tests/linalg/linear_operator_identity_test.py | 2 -- .../kernel_tests/linalg/linear_operator_kronecker_test.py | 2 -- .../kernel_tests/linalg/linear_operator_low_rank_update_test.py | 2 -- .../linalg/linear_operator_lower_triangular_test.py | 2 -- .../python/kernel_tests/linalg/linear_operator_util_test.py | 2 -- .../python/kernel_tests/linalg/linear_operator_zeros_test.py | 2 -- 11 files changed, 22 deletions(-) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py index cf56168d63..628ed998c5 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py @@ -19,14 +19,12 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import random_seed from tensorflow.python.ops import linalg_ops from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.ops.linalg import linear_operator_addition from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) rng = np.random.RandomState(0) add_operators = linear_operator_addition.add_operators diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py index 3ede2aceaa..30951b1b0e 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.ops.linalg import linear_operator_block_diag as block_diag @@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py index 99497914f2..02f56db596 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py @@ -21,7 +21,6 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linalg as linalg_lib @@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py index 52861ae84a..0758349531 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops @@ -27,7 +26,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) class LinearOperatorDiagTest( diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py index 8373b5263f..8c2d2cf077 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linalg as linalg_lib @@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) class SquareLinearOperatorFullMatrixTest( diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py index 0c3c6b390f..465a8194dd 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import random_ops @@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test -random_seed.set_random_seed(23) rng = np.random.RandomState(2016) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py index 7e81c9c6c4..f039b60f64 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py @@ -21,7 +21,6 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.ops.linalg import linear_operator_kronecker as kronecker @@ -30,7 +29,6 @@ from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py index 61268607a4..207e5edf81 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linalg as linalg_lib @@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py index eb4bff915b..e3c8f5cb68 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py @@ -17,14 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib -random_seed.set_random_seed(23) class LinearOperatorLowerTriangularTest( diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py index 86847d38c2..13218787e2 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py @@ -21,14 +21,12 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test -random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py index f0556304ad..ad97d1a93e 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py @@ -20,14 +20,12 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test -random_seed.set_random_seed(23) rng = np.random.RandomState(2016) -- GitLab From 32b9901c0e20f82831a5cf0a42b016e7ff5197d0 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Tue, 9 Oct 2018 09:17:04 -0700 Subject: [PATCH 115/411] Internal change PiperOrigin-RevId: 216367867 --- tensorflow/contrib/lite/build_def.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index b9e933a8b6..b3607a761c 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -313,8 +313,8 @@ def generated_test_models_failing(conversion_mode): "arg_min_max", "div", "floor_div", - "gather ", - "lstm ", + "gather", + "lstm", "resize_bilinear", "space_to_batch_nd", "split", -- GitLab From df11cce2e600581087f29ef0b85286f7e582572d Mon Sep 17 00:00:00 2001 From: Tamara Norman Date: Tue, 9 Oct 2018 09:18:53 -0700 Subject: [PATCH 116/411] Throw error when evaluating have variable target in GradientTape. PiperOrigin-RevId: 216368178 --- tensorflow/python/eager/backprop.py | 9 ++++++++- tensorflow/python/eager/backprop_test.py | 12 +++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index deac29111f..44ce69ee60 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -868,6 +868,7 @@ class GradientTape(object): Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. + ValueError: if called on variable target. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once on " @@ -887,6 +888,12 @@ class GradientTape(object): "gradient in order to compute higher order " "derrivatives.", 1) + flat_targets = nest.flatten(target) + for t in flat_targets: + if resource_variable_ops.is_resource_variable(t): + raise ValueError("GradientTape.gradient is not supported for variable " + "targets.") + flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] @@ -896,7 +903,7 @@ class GradientTape(object): flat_grad = imperative_grad.imperative_grad( self._tape, - nest.flatten(target), + flat_targets, flat_sources, output_gradients=output_gradients) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 32731747b7..7e5c9f3cb6 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -548,6 +548,17 @@ class BackpropTest(test.TestCase): grad = g.gradient(y, [x])[0] self.assertEqual(self.evaluate(grad), 6.0) + @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes + def testGadientTapeCalledOnConstantTarget(self): + with backprop.GradientTape() as g: + x = variables.Variable([3.0]) + y = variables.Variable([2.0]) + with self.assertRaisesRegexp( + ValueError, + 'GradientTape.gradient is not supported for variable targets.'): + g.gradient(x, y) + @test_util.run_in_graph_and_eager_modes def testGradientTapeWithCond(self): x = constant_op.constant(3.0) @@ -982,7 +993,6 @@ class BackpropTest(test.TestCase): self.assertIsNone(dy) self.assertEqual(self.evaluate(dz), 3.0) - @test_util.run_in_graph_and_eager_modes def testDifferentiatingScalarCache(self): # In the following test, if x2 = x1 (i.e the objects are the exact same), -- GitLab From 92d533d19c44ab838a1f7954350fdafd62cfa889 Mon Sep 17 00:00:00 2001 From: Peter Ma Date: Tue, 9 Oct 2018 09:24:57 -0700 Subject: [PATCH 117/411] Change LOG(WARNING) to VLOG(1) in utils PiperOrigin-RevId: 216369081 --- tensorflow/core/grappler/costs/utils.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 2fcadf1de3..87b74e2952 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -209,7 +209,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) { // Can't infer the size if the rank is unknown. It has to be at least a // scalar though. if (shape.unknown_rank()) { - LOG(WARNING) << "CalculateTensorSize() -- unknown rank"; + VLOG(1) << "CalculateTensorSize() -- unknown rank"; return size; } @@ -217,7 +217,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) { for (int i = 0; i < shape.dim_size(); ++i) { if (shape.dim(i).size() < 0) { shape.mutable_dim(i)->set_size(1); - LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i; + VLOG(1) << "CalculateTensorSize() -- unknown dim: " << i; } } -- GitLab From 87d8055c74a65ec9fb2a13f38e6e2c5d30b7e2e4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 09:30:32 -0700 Subject: [PATCH 118/411] Correctly pre-reserve visit state in HloInstruction::PostOrderDFS Previously we pre-reserverd the visit state based on the number of instructions but then started to index it with the instruction unique ID what can be larger then the instruction count. This resulted in some very expensive re-allocations what can be eliminated by reserving the correctly sized buffer. PiperOrigin-RevId: 216369849 --- tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 5c3908a9a4..050d28b289 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2474,7 +2474,7 @@ template static Status PostOrderDFS(HloInstruction* root, Visitor* visitor, const InternalCompareFunction* operand_order, bool ignore_control_predecessors) { - visitor->ReserveVisitStates(root->GetModule()->instruction_count()); + visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds()); // dfs_stack holds pairs of unique_id(), HloInstruction*>. // -- GitLab From 3e1a0792fb593953860162d57320c8602fd199eb Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Tue, 9 Oct 2018 09:32:50 -0700 Subject: [PATCH 119/411] Create SDCAOptimizerV2 op to fix the "adaptative" typo. PiperOrigin-RevId: 216370193 --- .../linear_optimizer/python/ops/sdca_ops.py | 57 ++++-- .../base_api/api_def_SdcaOptimizerV2.pbtxt | 171 ++++++++++++++++++ tensorflow/core/kernels/sdca_ops.cc | 8 +- tensorflow/core/ops/sdca_ops.cc | 28 +++ tensorflow/python/ops/sdca_ops.py | 1 + 5 files changed, 246 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index b98adf862b..48ac429701 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -22,6 +22,7 @@ import collections from six.moves import range from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable +from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -485,24 +486,44 @@ class SdcaModel(object): sparse_weights.append(batch_gathered_weights) # pylint: disable=protected-access - esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( - sparse_example_indices, - sparse_feature_indices, - sparse_features_values, - self._convert_n_to_tensor(self._examples['dense_features']), - internal_convert_to_tensor(self._examples['example_weights']), - internal_convert_to_tensor(self._examples['example_labels']), - sparse_indices, - sparse_weights, - self._convert_n_to_tensor(self._slots[ - 'unshrinked_dense_features_weights']), - example_state_data, - loss_type=self._options['loss_type'], - l1=self._options['symmetric_l1_regularization'], - l2=self._symmetric_l2_regularization(), - num_loss_partitions=self._num_loss_partitions(), - num_inner_iterations=1, - adaptative=self._adaptive()) + if compat.forward_compatible(year=2018, month=10, day=30): + esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2( + sparse_example_indices, + sparse_feature_indices, + sparse_features_values, + self._convert_n_to_tensor(self._examples['dense_features']), + internal_convert_to_tensor(self._examples['example_weights']), + internal_convert_to_tensor(self._examples['example_labels']), + sparse_indices, + sparse_weights, + self._convert_n_to_tensor(self._slots[ + 'unshrinked_dense_features_weights']), + example_state_data, + loss_type=self._options['loss_type'], + l1=self._options['symmetric_l1_regularization'], + l2=self._symmetric_l2_regularization(), + num_loss_partitions=self._num_loss_partitions(), + num_inner_iterations=1, + adaptive=self._adaptive()) + else: + esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( + sparse_example_indices, + sparse_feature_indices, + sparse_features_values, + self._convert_n_to_tensor(self._examples['dense_features']), + internal_convert_to_tensor(self._examples['example_weights']), + internal_convert_to_tensor(self._examples['example_labels']), + sparse_indices, + sparse_weights, + self._convert_n_to_tensor(self._slots[ + 'unshrinked_dense_features_weights']), + example_state_data, + loss_type=self._options['loss_type'], + l1=self._options['symmetric_l1_regularization'], + l2=self._symmetric_l2_regularization(), + num_loss_partitions=self._num_loss_partitions(), + num_inner_iterations=1, + adaptative=self._adaptive()) # pylint: enable=protected-access with ops.control_dependencies([esu]): diff --git a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt new file mode 100644 index 0000000000..c615dee8c7 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt @@ -0,0 +1,171 @@ +op { + graph_op_name: "SdcaOptimizerV2" + visibility: HIDDEN + in_arg { + name: "sparse_example_indices" + description: < +Shai Shalev-Shwartz, Tong Zhang. 2012 + +$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ + +[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
+Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, +Peter Richtarik, Martin Takac. 2015 + +[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
+Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 +END +} diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc index 3bd4168dc7..d0e0b15da7 100644 --- a/tensorflow/core/kernels/sdca_ops.cc +++ b/tensorflow/core/kernels/sdca_ops.cc @@ -83,7 +83,11 @@ struct ComputeOptions { context, false, errors::InvalidArgument("Unsupported loss type: ", loss_type)); } - OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive)); + auto s = context->GetAttr("adaptative", &adaptive); + if (!s.ok()) { + s = context->GetAttr("adaptive", &adaptive); + } + OP_REQUIRES_OK(context, s); OP_REQUIRES_OK( context, context->GetAttr("num_sparse_features", &num_sparse_features)); OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values", @@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU), SdcaOptimizer); +REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU), + SdcaOptimizer); class SdcaShrinkL1 : public OpKernel { public: diff --git a/tensorflow/core/ops/sdca_ops.cc b/tensorflow/core/ops/sdca_ops.cc index fdf53a55dd..51d248f2d6 100644 --- a/tensorflow/core/ops/sdca_ops.cc +++ b/tensorflow/core/ops/sdca_ops.cc @@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer") .Output("out_delta_dense_weights: num_dense_features * float") .SetShapeFn(ApplySdcaOptimizerShapeFn); +// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1. +REGISTER_OP("SdcaOptimizerV2") + .Attr( + "loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss'," + "'smooth_hinge_loss', 'poisson_loss'}") + .Attr("adaptive : bool=false") + .Attr("num_sparse_features: int >= 0") + .Attr("num_sparse_features_with_values: int >= 0") + .Attr("num_dense_features: int >= 0") + .Attr("l1: float") + .Attr("l2: float") + .Attr("num_loss_partitions: int >= 1") + .Attr("num_inner_iterations: int >= 1") + .Input("sparse_example_indices: num_sparse_features * int64") + .Input("sparse_feature_indices: num_sparse_features * int64") + .Input("sparse_feature_values: num_sparse_features_with_values * float") + .Input("dense_features: num_dense_features * float") + .Input("example_weights: float") + .Input("example_labels: float") + .Input("sparse_indices: num_sparse_features * int64") + .Input("sparse_weights: num_sparse_features * float") + .Input("dense_weights: num_dense_features * float") + .Input("example_state_data: float") + .Output("out_example_state_data: float") + .Output("out_delta_sparse_weights: num_sparse_features * float") + .Output("out_delta_dense_weights: num_dense_features * float") + .SetShapeFn(ApplySdcaOptimizerShapeFn); + REGISTER_OP("SdcaShrinkL1") .Attr("num_features: int >= 0") .Attr("l1: float") diff --git a/tensorflow/python/ops/sdca_ops.py b/tensorflow/python/ops/sdca_ops.py index 4d5aeec591..a1c68343ed 100644 --- a/tensorflow/python/ops/sdca_ops.py +++ b/tensorflow/python/ops/sdca_ops.py @@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import * ops.NotDifferentiable("SdcaFprint") ops.NotDifferentiable("SdcaOptimizer") +ops.NotDifferentiable("SdcaOptimizerV2") ops.NotDifferentiable("SdcaShrinkL1") -- GitLab From 5d6adc910b8323b73a61d3089f3a3028be411e90 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 09:33:59 -0700 Subject: [PATCH 120/411] Improve docstring for tf.data.Dataset.shuffle() PiperOrigin-RevId: 216370329 --- tensorflow/python/data/ops/dataset_ops.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index b7e19055f2..cf52f7529a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -733,6 +733,11 @@ class Dataset(object): def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None): """Randomly shuffles the elements of this dataset. + This dataset fills a buffer with `buffer_size` elements, then randomly + samples elements from this buffer, replacing the selected elements with new + elements. For perfect shuffling, a buffer size greater than or equal to the + full size of the dataset is required. + Args: buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the number of elements from this dataset from which the new -- GitLab From 3ef35b81fd753401e3d69989b3bd1146749cc3b3 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 9 Oct 2018 09:34:47 -0700 Subject: [PATCH 121/411] Include live-in symbols in liveness analysis. These are required for control flow conversion. PiperOrigin-RevId: 216370439 --- tensorflow/python/autograph/pyct/anno.py | 1 + tensorflow/python/autograph/pyct/cfg.py | 10 ++- .../pyct/static_analysis/liveness.py | 36 +++++--- .../pyct/static_analysis/liveness_test.py | 86 +++++++++++++++++-- 4 files changed, 112 insertions(+), 21 deletions(-) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index 1a52110ef3..5392e6ea03 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -91,6 +91,7 @@ class Static(NoValue): DEFINED_VARS_IN = ( 'Symbols defined when entering the node. See reaching_definitions.py.') LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.') + LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.') FAIL = object() diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py index fca0eb62e4..ec733ea38f 100644 --- a/tensorflow/python/autograph/pyct/cfg.py +++ b/tensorflow/python/autograph/pyct/cfg.py @@ -22,6 +22,10 @@ Once built, the CFG itself is immutable, but the values it holds need not be; they are usually annotated with information extracted by walking the graph. """ +# TODO(mdan): The notion of 'statements' below is inaccurate. +# They should rather be called 'block statements', because they include +# statements that may have a body, e.g. if and while. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -763,9 +767,9 @@ class AstToCfg(gast.NodeVisitor): self.builder.enter_section(node) - # TODO(mdan): Strictly speaking, this should be node.target + node.iter. - # A blind dataflow analysis would have to process both node.target and - # node.iter to properly process read and write access. + # Note: Strictly speaking, this should be node.target + node.iter. + # However, the activity analysis accounts for this inconsistency, + # so dataflow analysis produces the correct values. self.builder.enter_loop_section(node, node.iter) for stmt in node.body: self.visit(stmt) diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py index 41c903beb9..36960d0103 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py +++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py @@ -14,8 +14,13 @@ # ============================================================================== """Live variable analysis. -This analysis attaches a set containing the live symbols that are live at the -exit of control flow statements. +See https://en.wikipedia.org/wiki/Live_variable_analysis for a definition of +the following idioms: live variable, live in, live out, which are used +throughout this file. + +This analysis attaches the following: + * symbols that are live at the exit of control flow statements + * symbols that are live at the entry of control flow statements Requires activity analysis. """ @@ -164,23 +169,34 @@ class Annotator(transformer.Base): self.current_analyzer = parent_analyzer return node - def _aggregate_successors_live_in(self, node): + def _block_statement_live_out(self, node): successors = self.current_analyzer.graph.stmt_next[node] - node_live_out = set() + stmt_live_out = set() for s in successors: - node_live_out.update(self.current_analyzer.in_[s]) - anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(node_live_out)) - node = self.generic_visit(node) + stmt_live_out.update(self.current_analyzer.in_[s]) + anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(stmt_live_out)) + return node + + def _block_statement_live_in(self, node, entry_node): + cfg_node = self.current_analyzer.graph.index[entry_node] + stmt_live_in = frozenset(self.current_analyzer.in_[cfg_node]) + anno.setanno(node, anno.Static.LIVE_VARS_IN, stmt_live_in) return node def visit_If(self, node): - return self._aggregate_successors_live_in(node) + node = self.generic_visit(node) + node = self._block_statement_live_out(node) + return self._block_statement_live_in(node, node.test) def visit_For(self, node): - return self._aggregate_successors_live_in(node) + node = self.generic_visit(node) + node = self._block_statement_live_out(node) + return self._block_statement_live_in(node, node.iter) def visit_While(self, node): - return self._aggregate_successors_live_in(node) + node = self.generic_visit(node) + node = self._block_statement_live_out(node) + return self._block_statement_live_in(node, node.test) def resolve(node, source_info, graphs): diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py index 0d5f369e92..7b67f8f608 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py @@ -47,14 +47,23 @@ class LivenessTest(test.TestCase): def assertHasLiveOut(self, node, expected): live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT) - live_out_str = set(str(v) for v in live_out) + live_out_strs = set(str(v) for v in live_out) if not expected: expected = () if not isinstance(expected, tuple): expected = (expected,) - self.assertSetEqual(live_out_str, set(expected)) + self.assertSetEqual(live_out_strs, set(expected)) - def test_stacked_if(self): + def assertHasLiveIn(self, node, expected): + live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN) + live_in_strs = set(str(v) for v in live_in) + if not expected: + expected = () + if not isinstance(expected, tuple): + expected = (expected,) + self.assertSetEqual(live_in_strs, set(expected)) + + def test_live_out_stacked_if(self): def test_fn(x, a): if a > 0: @@ -69,7 +78,7 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], ('a', 'x')) self.assertHasLiveOut(fn_body[1], 'x') - def test_stacked_if_else(self): + def test_live_out_stacked_if_else(self): def test_fn(x, a): if a > 0: @@ -86,7 +95,7 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], 'a') self.assertHasLiveOut(fn_body[1], 'x') - def test_for_basic(self): + def test_live_out_for_basic(self): def test_fn(x, a): for i in range(a): @@ -98,7 +107,7 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], 'x') - def test_attributes(self): + def test_live_out_attributes(self): def test_fn(x, a): if a > 0: @@ -110,7 +119,7 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], ('x.y', 'x')) - def test_nested_functions(self): + def test_live_out_nested_functions(self): def test_fn(a, b): if b: @@ -126,7 +135,7 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], 'a') - def test_nested_functions_isolation(self): + def test_live_out_nested_functions_isolation(self): def test_fn(b): if b: @@ -144,6 +153,67 @@ class LivenessTest(test.TestCase): self.assertHasLiveOut(fn_body[0], 'max') + def test_live_in_stacked_if(self): + + def test_fn(x, a, b, c): + if a > 0: + x = b + if c > 1: + x = 0 + return x + + node = self._parse_and_analyze(test_fn) + fn_body = node.body[0].body + + self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'x')) + self.assertHasLiveIn(fn_body[1], ('c', 'x')) + + def test_live_in_stacked_if_else(self): + + def test_fn(x, a, b, c, d): + if a > 1: + x = b + else: + x = c + if d > 0: + x = 0 + return x + + node = self._parse_and_analyze(test_fn) + fn_body = node.body[0].body + + self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'd')) + self.assertHasLiveIn(fn_body[1], ('d', 'x')) + + def test_live_in_for_basic(self): + + def test_fn(x, y, a): + for i in a: + x = i + y += x + z = 0 + return y, z + + node = self._parse_and_analyze(test_fn) + fn_body = node.body[0].body + + self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z')) + + def test_live_in_for_nested(self): + + def test_fn(x, y, a): + for i in a: + for j in i: + x = i + y += x + z = j + return y, z + + node = self._parse_and_analyze(test_fn) + fn_body = node.body[0].body + + self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z')) + if __name__ == '__main__': test.main() -- GitLab From 37146b89788c2a0796ca6b863bde9c4c0dc4068e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 09:46:04 -0700 Subject: [PATCH 122/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216372144 --- tensorflow/go/op/wrappers.go | 222 +++++++++++++++++------------------ 1 file changed, 111 insertions(+), 111 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index fe99915a6c..eb6df2af46 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -10415,6 +10415,79 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { return op.Output(0) } +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// +// Arguments: +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. +// +// Returns A Tensor of type `out_type`. +func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "ParseTensor", + Input: []tf.Input{ + serialized, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) + +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { + return func(m optionalAttr) { + m["Targmax"] = value + } +} + +// Performs max pooling on the input and outputs both max values and indices. +// +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. +// +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. +// +// Arguments: +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolWithArgmax", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. type ResourceSparseApplyFtrlV2Attr func(optionalAttr) @@ -14202,44 +14275,6 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu return op.Output(0), op.Output(1) } -// Computes the mean along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. -// -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMean", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) @@ -15941,79 +15976,6 @@ func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, label return op.Output(0), op.Output(1) } -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. -// -// Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. -// -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - opspec := tf.OpSpec{ - Type: "ParseTensor", - Input: []tf.Input{ - serialized, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) - -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { - return func(m optionalAttr) { - m["Targmax"] = value - } -} - -// Performs max pooling on the input and outputs both max values and indices. -// -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. -// -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Returns the truth value of NOT x element-wise. func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { @@ -21926,6 +21888,44 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp return op.Output(0), op.Output(1), op.Output(2) } +// Computes the mean along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is +// over `j` such that `segment_ids[j] == i` and `N` is the total number of +// values summed. +// +// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMean", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the minimum along segments of a tensor. // // Read -- GitLab From 1b4402137a76c8085c160edfcc0c3be3cfa8fa3a Mon Sep 17 00:00:00 2001 From: Tayo Oguntebi Date: Tue, 9 Oct 2018 10:05:11 -0700 Subject: [PATCH 123/411] Fixes typo in Sort description. PiperOrigin-RevId: 216375421 --- tensorflow/compiler/xla/client/xla_builder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 9ceede7a79..933c0e7b44 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -2002,7 +2002,7 @@ XlaOp Rev(const XlaOp& operand, absl::Span dimensions); // the last dimension is chosen by default. // // If both keys and values are provided: -// * The keys and the values must tensors with the same dimensions. The +// * The keys and the values must be tensors with the same dimensions. The // element types of the tensors may be different. // * The result is a tuple that consists of a sorted tensor of keys (along the // provided dimension, as above) as the first element, and a tensor with their -- GitLab From 11f32ebbdcd4eaf5e9e09fe27571e26ec0bd9dd8 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Tue, 9 Oct 2018 10:40:23 -0700 Subject: [PATCH 124/411] [tf.data vectorization] Handle captured inputs in MapVectorization optimization PiperOrigin-RevId: 216381943 --- .../optimizers/data/map_vectorization.cc | 31 +++++++++------- .../optimizers/data/vectorization_utils.cc | 35 ++++++++++++++++--- .../optimization/map_vectorization_test.py | 9 ++--- 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc index a9254ed58b..0576d075c2 100644 --- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc +++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc @@ -60,14 +60,24 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node, graph_utils::CopyAttribute(k, map_node, map_defun_node); } + // Note that the inputs to the function are either regular arguments (for + // which the function is mapped across their 0th dimension) or captured inputs + // (for which the function takes the argument wholesale). We can infer + // the split between these arguments from the `map_node`'s attrs. + // The Targuments attr on `map_node` corresponds to a list of types of + // MapDataset's captured inputs. + auto t_captured = map_node.attr().at("Targuments"); + // Get types of input arguments from original map function - AttrValue t_args; + DataTypeVector t_args; // Regular arguments for (const auto& input : vectorized_func->signature().input_arg()) { - t_args.mutable_list()->add_type(input.type()); + t_args.push_back(input.type()); map_defun_node->add_input(input.name()); } - (*map_defun_node->mutable_attr())["Targuments"] = t_args; - AddNodeAttr("Tcaptured", DataTypeVector(), map_defun_node); + // Erase the captured arguments from Targuments + t_args.erase(t_args.end() - t_captured.list().type_size(), t_args.end()); + AddNodeAttr("Targuments", t_args, map_defun_node); + AddNodeAttr("Tcaptured", t_captured, map_defun_node); // Set return values to match output names string output_prefix = strings::StrCat(map_defun_node->name(), ":output:"); @@ -96,7 +106,9 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node, *vectorized_func, map_defun_node, library, &result); if (!s.ok()) { - LOG(ERROR) << "VectorizeMapDefun failed: " << s; + LOG(WARNING) << "VectorizeMapDefun failed. The function will only be " + "naively vectorized with MapDefun. Reason: " + << s; return vectorized_func; } return result; @@ -129,10 +141,6 @@ bool IsStatefulFn(const FunctionLibraryDefinition& library, return false; } -bool HasCapturedInputs(const NodeDef& map_node) { - return map_node.attr().at("Targuments").list().type_size() > 0; -} - NodeDef MakeNewBatchNode(const NodeDef& old_batch_node, const NodeDef& input_node, const FunctionDef& vectorized_func, @@ -239,15 +247,12 @@ Status MapVectorization::Optimize(Cluster* cluster, const GrapplerItem& item, // Check that this is a valid optimization. if (!IsOutputShapesFullyDefined(*input_node) || !IsOutputShapesFullyDefined(*map_node) || - IsStatefulFn(function_library, *orig_func) || - HasCapturedInputs(*map_node)) { + IsStatefulFn(function_library, *orig_func)) { // 1. If any of the inputs have an unknown shape, don't optimize, since // inputs might not be batchable. // 2. If any of the map func outputs have an unknown shape, don't // optimize, so that batching errors surface as before. // 3. If the function is stateful, don't vectorize it. - // 4. TODO(rachelim): Make this work for MapDataset with captured inputs - // by tiling inputs or modifying the signature of MapDefun. continue; } diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index ba857ab5d9..d977ff3198 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -253,8 +253,13 @@ Status Vectorization::AddConversionMapping(Node* op_node) { } } - TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(), - std::move(inputs), &outputs)); + Status s = vectorizer->Vectorize(*op_node, outer_scope_.get(), + std::move(inputs), &outputs); + if (!s.ok()) { + VLOG(2) << "Vectorizer for op \"" << op_node->type_string() + << "\" failed with error: " << s; + return s; + } if (op_node->num_outputs() != outputs.size()) { return errors::Internal( @@ -481,17 +486,37 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked, } Status Vectorization::AddArgNodeMappings() { - for (auto arg_node : map_defun_fn_->arg_nodes) { + // Note that inputs to map_defun_fn_ are either regular arguments (for which + // the operations are mapped across their 0th dimension) or captured inputs + // (for which the operations apply to the argument wholesale). + int num_args = + map_defun_node_->attrs().Find("Targuments")->list().type_size(); + + auto add_conversion = [this](Node* arg_node, bool stacked) { Node* input_node; TF_RETURN_IF_ERROR(map_defun_node_->input_node( arg_node->attrs().Find("index")->i(), &input_node)); - conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}}); + conversion_map_.insert({{arg_node, 0}, {input_node, 0, stacked}}); // Control inputs conversion_map_.insert({{arg_node, Graph::kControlSlot}, - {input_node, Graph::kControlSlot, true}}); + {input_node, Graph::kControlSlot, stacked}}); + + return Status::OK(); + }; + + // Regular arguments + for (int i = 0; i < num_args; ++i) { + TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], true)); + } + + // Captured inputs. These are applied (without slicing) to every iteration of + // the map function, hence are mapped to unstacked nodes. + for (int i = num_args; i < map_defun_fn_->arg_nodes.size(); ++i) { + TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], false)); } + return Status::OK(); } diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py index 971a2d94b9..803ff87924 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py @@ -105,15 +105,16 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): def testOptimizationWithCapturedInputs(self): # Tests that vectorization works with captured inputs + y = constant_op.constant(1, shape=(2,)) + z = constant_op.constant(2, shape=(2,)) + def map_fn(x): - return x + y + return x, y, z - y = constant_op.constant(1, shape=(2,)) base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], [3, 4]]).repeat(5) - # TODO(rachelim): when this optimization works, turn on expect_optimized unoptimized, optimized = self._get_test_datasets( - base_dataset, map_fn, expect_optimized=False) + base_dataset, map_fn, expect_optimized=True) self.assertDatasetsEqual(optimized, unoptimized) def testOptimizationIgnoreStateful(self): -- GitLab From aa8f428a9310b3fd8371bddf612e480b27618b2e Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 9 Oct 2018 10:47:19 -0700 Subject: [PATCH 125/411] Removing the _SHOULD_RECORD_SUMMARIES_NAME and _SUMMARY_WRITER_INIT_COLLECTION_NAME collections from the summaryV2 implementation. Replacing them with global variables. PiperOrigin-RevId: 216383152 --- tensorflow/python/ops/summary_ops_v2.py | 56 +++++++++++++------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py index a404507627..18cefb8e1c 100644 --- a/tensorflow/python/ops/summary_ops_v2.py +++ b/tensorflow/python/ops/summary_ops_v2.py @@ -43,11 +43,12 @@ from tensorflow.python.training import training_util from tensorflow.python.util import tf_contextlib -# Name for a collection which is expected to have at most a single boolean -# Tensor. If this tensor is True the summary ops will record summaries. -_SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" +# A global dictionary mapping graph keys to boolean values indicating whether +# we should record summaries for this particular graph or not. +_SHOULD_RECORD_SUMMARIES = {} -_SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2" +# A global dictionary mapping graph keys to a list of summary writer init ops. +_SUMMARY_WRITER_INIT_OP = {} _EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$") _RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$") @@ -56,14 +57,9 @@ _USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I) def should_record_summaries(): """Returns boolean Tensor which is true if summaries should be recorded.""" - should_record_collection = ops.get_collection(_SHOULD_RECORD_SUMMARIES_NAME) - if not should_record_collection: - return False - if len(should_record_collection) != 1: - raise ValueError( - "More than one tensor specified for whether summaries " - "should be recorded: %s" % should_record_collection) - return should_record_collection[0] + global _SHOULD_RECORD_SUMMARIES + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + return _SHOULD_RECORD_SUMMARIES.setdefault(key, False) # TODO(apassos) consider how to handle local step here. @@ -72,38 +68,41 @@ def record_summaries_every_n_global_steps(n, global_step=None): """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" if global_step is None: global_step = training_util.get_or_create_global_step() - collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) - old = collection_ref[:] + global _SHOULD_RECORD_SUMMARIES + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False) try: with ops.device("cpu:0"): - collection_ref[:] = [math_ops.equal(global_step % n, 0)] + _SHOULD_RECORD_SUMMARIES[key] = math_ops.equal(global_step % n, 0) yield finally: - collection_ref[:] = old + _SHOULD_RECORD_SUMMARIES[key] = old @tf_contextlib.contextmanager def always_record_summaries(): """Sets the should_record_summaries Tensor to always true.""" - collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) - old = collection_ref[:] + global _SHOULD_RECORD_SUMMARIES + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False) try: - collection_ref[:] = [True] + _SHOULD_RECORD_SUMMARIES[key] = True yield finally: - collection_ref[:] = old + _SHOULD_RECORD_SUMMARIES[key] = old @tf_contextlib.contextmanager def never_record_summaries(): """Sets the should_record_summaries Tensor to always false.""" - collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) - old = collection_ref[:] + global _SHOULD_RECORD_SUMMARIES + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False) try: - collection_ref[:] = [False] + _SHOULD_RECORD_SUMMARIES[key] = False yield finally: - collection_ref[:] = old + _SHOULD_RECORD_SUMMARIES[key] = old class SummaryWriter(object): @@ -143,7 +142,6 @@ class SummaryWriter(object): finally: context.context().summary_writer_resource = old - def init(self): """Operation to initialize the summary writer resource.""" if self._resource is not None: @@ -311,7 +309,9 @@ def _make_summary_writer(name, factory, **kwargs): if not context.executing_eagerly(): # TODO(apassos): Consider doing this instead. # ops.get_default_session().run(init_op) - ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, init_op) + global _SUMMARY_WRITER_INIT_OP + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + _SUMMARY_WRITER_INIT_OP.setdefault(key, []).append(init_op) return SummaryWriter(resource, init_op_fn) @@ -352,7 +352,9 @@ def summary_writer_initializer_op(): raise RuntimeError( "tf.contrib.summary.summary_writer_initializer_op is only " "supported in graph mode.") - return ops.get_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME) + global _SUMMARY_WRITER_INIT_OP + key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + return _SUMMARY_WRITER_INIT_OP.setdefault(key, []) def summary_writer_function(name, tensor, function, family=None): -- GitLab From 3e8af7ea6b70104b05be22797451d0218c9e5262 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Tue, 9 Oct 2018 10:58:03 -0700 Subject: [PATCH 126/411] Internal change. PiperOrigin-RevId: 216385202 --- .../lite/testing/model_coverage/model_coverage_lib.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py index 72029ed03c..ab29f71138 100644 --- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py +++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py @@ -297,7 +297,7 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs): compare_models_random_data(tflite_model, tf_eval_func) -def test_keras_model(filename, **kwargs): +def test_keras_model(filename, input_arrays=None, input_shapes=None, **kwargs): """Validates the tf.keras model converts to a TFLite model. Converts the tf.keras model to TFLite and checks the accuracy of the model on @@ -305,9 +305,15 @@ def test_keras_model(filename, **kwargs): Args: filename: Full filepath of HDF5 file containing the tf.keras model. + input_arrays: List of input tensors to freeze graph with. + input_shapes: Dict of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) **kwargs: Additional arguments to be passed into the converter. """ - converter = _lite.TFLiteConverter.from_keras_model_file(filename) + converter = _lite.TFLiteConverter.from_keras_model_file( + filename, input_arrays=input_arrays, input_shapes=input_shapes) tflite_model = _convert(converter, **kwargs) tf_eval_func = evaluate_keras_model(filename) -- GitLab From 1e4a3baad388b5d5250efdb19f91d5b670816fbe Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Tue, 9 Oct 2018 11:03:57 -0700 Subject: [PATCH 127/411] Update TFLite Converter documentation. PiperOrigin-RevId: 216386450 --- tensorflow/contrib/lite/toco/README.md | 9 +- .../lite/toco/g3doc/cmdline_examples.md | 66 ++++++------- .../lite/toco/g3doc/cmdline_reference.md | 8 +- .../contrib/lite/toco/g3doc/python_api.md | 95 ++++++++++--------- 4 files changed, 93 insertions(+), 85 deletions(-) diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md index 2db6a627ab..91f6f618a3 100644 --- a/tensorflow/contrib/lite/toco/README.md +++ b/tensorflow/contrib/lite/toco/README.md @@ -1,6 +1,6 @@ -# TOCO: TensorFlow Lite Optimizing Converter +# TensorFlow Lite Converter -The TensorFlow Lite Optimizing Converter converts TensorFlow graphs into +The TensorFlow Lite Converter converts TensorFlow graphs into TensorFlow Lite graphs. There are additional usages that are also detailed in the usage documentation. @@ -14,9 +14,10 @@ Usage information is given in these documents: ## Where the converter fits in the TensorFlow landscape -Once an application developer has a trained TensorFlow model, TOCO will accept +Once an application developer has a trained TensorFlow model, the TensorFlow +Lite Converter will accept that model and generate a TensorFlow Lite -[FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports +[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports [SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators), frozen graphs (models generated via [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)), diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md index aba7536cbd..e3c46eb377 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -1,7 +1,7 @@ -# TensorFlow Lite Optimizing Converter command-line examples +# TensorFlow Lite Converter command-line examples -This page provides examples on how to use TOCO via command line. It is -complemented by the following documents: +This page shows how to use the TensorFlow Lite Converter in the command line. It +is complemented by the following documents: * [README](../README.md) * [Command-line glossary](cmdline_reference.md) @@ -10,7 +10,7 @@ complemented by the following documents: Table of contents: * [Command-line tools](#tools) - * [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9) + * [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9) * [Basic examples](#basic) * [Convert a TensorFlow GraphDef](#graphdef) * [Convert a TensorFlow SavedModel](#savedmodel) @@ -31,27 +31,28 @@ Table of contents: ## Command-line tools
-There are two approaches to running TOCO via command line. +There are two approaches to running the converter in the command line. * `tflite_convert`: Starting from TensorFlow 1.9, the command-line tool - `tflite_convert` will be installed as part of the Python package. All of the + `tflite_convert` is installed as part of the Python package. All of the examples below use `tflite_convert` for simplicity. * Example: `tflite_convert --output_file=...` -* `bazel`: In order to run the latest version of TOCO, [clone the TensorFlow - repository](https://www.tensorflow.org/install/source) - and use `bazel`. This is the recommended approach for converting models that - utilize new features that were not supported by TOCO in TensorFlow 1.9. +* `bazel`: In order to run the latest version of the TensorFlow Lite Converter + either install the nightly build using + [pip](https://www.tensorflow.org/install/pip) or + [clone the TensorFlow repository](https://www.tensorflow.org/install/source) + and use `bazel`. * Example: `bazel run //tensorflow/contrib/lite/python:tflite_convert -- --output_file=...` -### Converting models prior to TensorFlow 1.9. +### Converting models prior to TensorFlow 1.9 -The recommended approach for using TOCO prior to TensorFlow 1.9 is the [Python -API](python_api.md#pre-tensorflow-1.9). If a command line tool is desired, the -`toco` command line tool was available in TensorFlow 1.7. Enter `toco --help` in -Terminal for additional details on the command-line flags available. There were -no command line tools in TensorFlow 1.8. +The recommended approach for using the converter prior to TensorFlow 1.9 is the +[Python API](python_api.md#pre-tensorflow-1.9). If a command line tool is +desired, the `toco` command line tool was available in TensorFlow 1.7. Enter +`toco --help` in Terminal for additional details on the command-line flags +available. There were no command line tools in TensorFlow 1.8. ## Basic examples @@ -117,9 +118,9 @@ tflite_convert \ ### Convert a TensorFlow GraphDef for quantized inference -TOCO is compatible with fixed point quantization models described -[here](https://www.tensorflow.org/performance/quantization). These are float -models with +The TensorFlow Lite Converter is compatible with fixed point quantization models +described [here](https://www.tensorflow.org/performance/quantization). These are +float models with [`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization) ops inserted at the boundaries of fused layers to record min-max range information. This generates a quantized inference workload that reproduces the @@ -141,12 +142,12 @@ tflite_convert \ ### Use \"dummy-quantization\" to try out quantized inference on a float graph -In order to evaluate the possible benefit of generating a quantized graph, TOCO -allows "dummy-quantization" on float graphs. The flags `--default_ranges_min` -and `--default_ranges_max` accept plausible values for the min-max ranges of the -values in all arrays that do not have min-max information. "Dummy-quantization" -will produce lower accuracy but will emulate the performance of a correctly -quantized model. +In order to evaluate the possible benefit of generating a quantized graph, the +converter allows "dummy-quantization" on float graphs. The flags +`--default_ranges_min` and `--default_ranges_max` accept plausible values for +the min-max ranges of the values in all arrays that do not have min-max +information. "Dummy-quantization" will produce lower accuracy but will emulate +the performance of a correctly quantized model. The example below contains a model using Relu6 activation functions. Therefore, a reasonable guess is that most activation ranges should be contained in [0, 6]. @@ -207,10 +208,10 @@ tflite_convert \ ### Specifying subgraphs Any array in the input file can be specified as an input or output array in -order to extract subgraphs out of an input graph file. TOCO discards the parts -of the graph outside of the specific subgraph. Use [graph -visualizations](#graph-visualizations) to identify the input and output arrays -that make up the desired subgraph. +order to extract subgraphs out of an input graph file. The TensorFlow Lite +Converter discards the parts of the graph outside of the specific subgraph. Use +[graph visualizations](#graph-visualizations) to identify the input and output +arrays that make up the desired subgraph. The follow command shows how to extract a single fused layer out of a TensorFlow GraphDef. @@ -247,9 +248,10 @@ function tends to get fused). ## Graph visualizations -TOCO can export a graph to the Graphviz Dot format for easy visualization via -either the `--output_format` flag or the `--dump_graphviz_dir` flag. The -subsections below outline the use cases for each. +The converter can export a graph to the Graphviz Dot format for easy +visualization using either the `--output_format` flag or the +`--dump_graphviz_dir` flag. The subsections below outline the use cases for +each. ### Using `--output_format=GRAPHVIZ_DOT` diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md index 00bc8d4ccb..31200fd657 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md @@ -1,8 +1,8 @@ -# TensorFlow Lite Optimizing Converter command-line glossary +# TensorFlow Lite Converter command-line glossary -This page is complete reference of command-line flags used by TOCO's command -line starting from TensorFlow 1.9 up until the most recent build of TensorFlow. -It is complemented by the following other documents: +This page is complete reference of command-line flags used by the TensorFlow +Lite Converter's command line starting from TensorFlow 1.9 up until the most +recent build of TensorFlow. It is complemented by the following other documents: * [README](../README.md) * [Command-line examples](cmdline_examples.md) diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md index 8c31c3dca8..1f741360c6 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -1,7 +1,8 @@ -# TensorFlow Lite Optimizing Converter & Interpreter Python API reference +# TensorFlow Lite Converter & Interpreter Python API reference -This page provides examples on how to use TOCO and the TensorFlow Lite -interpreter via the Python API. It is complemented by the following documents: +This page provides examples on how to use the TensorFlow Lite Converter and the +TensorFlow Lite interpreter using the Python API. It is complemented by the +following documents: * [README](../README.md) * [Command-line examples](cmdline_examples.md) @@ -23,39 +24,35 @@ Table of contents: * [Using the interpreter from model data](#interpreter-data) * [Additional instructions](#additional-instructions) * [Build from source code](#latest-package) - * [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9) + * [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) + * [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9) ## High-level overview -While the TensorFlow Lite Optimizing Converter can be used from the command -line, it is often convenient to use it as part of a Python model build and -training script. This is so that conversion can be part of your model -development pipeline. This allows you to know early and often that you are -designing a model that can be targeted to devices with mobile. +While the TensorFlow Lite Converter can be used from the command line, it is +often convenient to use in a Python script as part of the model development +pipeline. This allows you to know early that you are designing a model that can +be targeted to devices with mobile. ## API The API for converting TensorFlow models to TensorFlow Lite as of TensorFlow 1.9 -is `tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is -`tf.contrib.lite.Interpreter`. - -**NOTE**: As of TensorFlow 1.12, the API for converting TensorFlow models to -TFLite will be renamed to `TFLiteConverter`. `TFLiteConverter` is semantically -identically to `TocoConverter`. The API is available at -`tf.contrib.lite.TFLiteConverter` as of the Sept 26 `tf-nightly`. - -`TocoConverter` provides class methods based on the original format of the -model. `TocoConverter.from_session()` is available for GraphDefs. -`TocoConverter.from_saved_model()` is available for SavedModels. -`TocoConverter.from_keras_model_file()` is available for `tf.Keras` files. +is `tf.contrib.lite.TFLiteConverter`. The API for calling the Python intepreter +is `tf.contrib.lite.Interpreter`. + +Note: Reference "Additional Instructions" sections for converting TensorFlow +models to TensorFlow Lite +[in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) and +[prior to TensorFlow 1.9](#pre-tensorflow-1.9) + +`TFLiteConverter` provides class methods based on the original format of the +model. `TFLiteConverter.from_session()` is available for GraphDefs. +`TFLiteConverter.from_saved_model()` is available for SavedModels. +`TFLiteConverter.from_keras_model_file()` is available for `tf.Keras` files. Example usages for simple float-point models are shown in [Basic Examples](#basic). Examples usages for more complex models is shown in [Complex Examples](#complex). -**NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python -interpreter when the conversion fails. This will be remedied as soon as -possible. - ## Basic examples The following section shows examples of how to convert a basic float-point model @@ -76,7 +73,7 @@ out = tf.identity(val, name="out") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` @@ -89,7 +86,7 @@ TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and The example uses [Mobilenet_1.0_224](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz). -The function only supports GraphDefs frozen via +The function only supports GraphDefs frozen using [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py). ```python @@ -99,7 +96,7 @@ graph_def_file = "/path/to/Downloads/mobilenet_v1_1.0_224/frozen_graph.pb" input_arrays = ["input"] output_arrays = ["MobilenetV1/Predictions/Softmax"] -converter = tf.contrib.lite.TocoConverter.from_frozen_graph( +converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph( graph_def_file, input_arrays, output_arrays) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) @@ -113,25 +110,26 @@ FlatBuffer. ```python import tensorflow as tf -converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir) +converter = tf.contrib.lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` For more complex SavedModels, the optional parameters that can be passed into -`TocoConverter.from_saved_model()` are `input_arrays`, `input_shapes`, +`TFLiteConverter.from_saved_model()` are `input_arrays`, `input_shapes`, `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are -available by running `help(tf.contrib.lite.TocoConverter)`. +available by running `help(tf.contrib.lite.TFLiteConverter)`. ### Exporting a tf.keras File The following example shows how to convert a `tf.keras` model into a TensorFlow -Lite FlatBuffer. +Lite FlatBuffer. This example requires +[`h5py`](http://docs.h5py.org/en/latest/build.html) to be installed. ```python import tensorflow as tf -converter = tf.contrib.lite.TocoConverter.from_keras_model_file("keras_model.h5") +converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file("keras_model.h5") tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` @@ -163,7 +161,7 @@ keras_file = "keras_model.h5" tf.keras.models.save_model(model, keras_file) # Convert to TensorFlow Lite model. -converter = tf.contrib.lite.TocoConverter.from_keras_model_file(keras_file) +converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` @@ -173,7 +171,7 @@ open("converted_model.tflite", "wb").write(tflite_model) For models where the default value of the attributes is not sufficient, the attribute's values should be set before calling `convert()`. In order to call any constants use `tf.contrib.lite.constants.` as seen below with -`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TocoConverter)` in the Python +`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TFLiteConverter)` in the Python terminal for detailed documentation on the attributes. Although the examples are demonstrated on GraphDefs containing only constants. @@ -193,7 +191,7 @@ val = img + const out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output") with tf.Session() as sess: - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8 input_arrays = converter.get_input_arrays() converter.quantized_input_stats = {input_arrays[0] : (0., 1.)} # mean, std_dev @@ -250,7 +248,7 @@ val = img + const out = tf.identity(val, name="out") with tf.Session() as sess: - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) tflite_model = converter.convert() # Load TFLite model and allocate tensors. @@ -262,13 +260,20 @@ interpreter.allocate_tensors() ### Build from source code -In order to run the latest version of the TOCO Python API, clone the TensorFlow -repository, configure the installation, and build and install the pip package. -Detailed instructions are available -[here](https://www.tensorflow.org/install/source). +In order to run the latest version of the TensorFlow Lite Converter Python API, +either install the nightly build with +[pip](https://www.tensorflow.org/install/pip) (recommended) or +[Docker](https://www.tensorflow.org/install/docker), or +[build the pip package from source](https://www.tensorflow.org/install/source). + +### Converting models in TensorFlow 1.9 to TensorFlow 1.11 + +To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through +TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically +identically to `TFLiteConverter`. -### Converting models prior to TensorFlow 1.9. +### Converting models prior to TensorFlow 1.9 -To use TOCO in TensorFlow 1.7 and TensorFlow 1.8, use the `toco_convert` -function. Run `help(tf.contrib.lite.toco_convert)` to get details about accepted -parameters. +To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow +1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)` +to get details about accepted parameters. -- GitLab From 84ace0358526bb51c04a3bef4b3072b93b9d1bec Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 9 Oct 2018 11:16:32 -0700 Subject: [PATCH 128/411] Improves tf.function prototype. Specifically: - renames from def_function - returns an object with well-defined methods - doesn't force-retrace twice - uses the python descriptor API ( https://docs.python.org/3/howto/descriptor.html ) to remove the need for a tf.method PiperOrigin-RevId: 216388957 --- tensorflow/python/eager/def_function.py | 188 +++++++++++++++---- tensorflow/python/eager/def_function_test.py | 32 +++- 2 files changed, 179 insertions(+), 41 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 8dcacd5c99..b23891d394 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -19,8 +19,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools +import weakref + from tensorflow.python.eager import context -from tensorflow.python.eager import function +from tensorflow.python.eager import function as function_lib from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import resource_variable_ops @@ -165,71 +168,184 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable): self._cached_shape_as_list = None -def _defun_with_scope(scope, fn): +def _defun_with_scope(scope, fn, input_signature): def wrapped_fn(*args, **kwds): with variable_scope.variable_creator_scope(scope): return fn(*args, **kwds) - return function.defun(wrapped_fn) + return function_lib.defun(wrapped_fn, input_signature=input_signature) -def def_function(fn): - """Defines a function as per the "functions, not sessions" document.""" +def _call_concrete(fn, args, unused_kwargs): + """Calls the given concrete function with only the tensor arguments.""" + + def inner(): + # TODO(apassos) figure out what to do with kwargs and concrete functions. + return fn(*[x for x in args if isinstance(x, ops.Tensor)]) + + return inner + + +class PolymorphicFunction(object): + """Wrapper class for the graph functions defined for a Python function. + + See the documentation for `tf.function` for more information on the semantics + of defined functions. - # Wrapping the values in lists to bypass python's lack of way to mutate - # symbols from an outer scope. - first_call = [True] - function_to_call = [] + PolymorphicFunction is thread-compatible. + """ + + def __init__(self, + python_function, + input_signature=None,): + """Initializes a polymorphic function. + + Args: + python_function: the function to be wrapped. + input_signature: a possibly nested sequence of `TensorSpec` objects + specifying the input signature of this function. If `None`, a separate + function is instantiated for each inferred input signature. + + Raises: + ValueError: if `input_signature` is not None and the `python_function`'s + argspec has keyword arguments. + """ + self._python_function = python_function + self._input_signature = input_signature + self._created_variables = None + self._stateful_fn = None + self._descriptor_cache = weakref.WeakKeyDictionary() - # TODO(apassos) represent this as an object and not as a closure. - def decorated_fn(*args, **kwds): - """Graph function for fn.""" - if not first_call[0]: - return function_to_call[0](*args, **kwds) + def _initialize(self, args, kwds): + """Initializes, on the first call.""" - first_call[0] = False - created_variables = [] + self._created_variables = [] - def variable_creator_scope(unused_next_creator, **kwds): + def variable_capturing_scope(unused_next_creator, **kwds): """Creates UnliftedInitializerVariables and saves references to them.""" v = UnliftedInitializerVariable(**kwds) - created_variables.append(v) + self._created_variables.append(v) return v - first_graph_function = _defun_with_scope(variable_creator_scope, fn) + self._stateful_fn = _defun_with_scope( + variable_capturing_scope, self._python_function, self._input_signature) # Force the definition of the function for these arguments - first_concrete = first_graph_function.get_concrete_function(*args, **kwds) + self._concrete_stateful_fn = self._stateful_fn.get_concrete_function( + *args, **kwds) def invalid_creator_scope(*unused_args, **unused_kwds): """Disables variable creation.""" raise ValueError( - "def_function-decorated function tried to create " - "variables on second call.") + "tf.function-decorated function tried to create " + "variables on non-first call.") - second_graph_function = _defun_with_scope(invalid_creator_scope, fn) + self._stateless_fn = _defun_with_scope( + invalid_creator_scope, self._python_function, self._input_signature) - function_to_call.append(second_graph_function) - if not created_variables: - # Note: this retracing might be unnecessary, but running the function - # forever in the scope which disallows variable creation is safer than not - # doing so. - return second_graph_function(*args, **kwds) + def __call__(self, *args, **kwds): + """Calls the graph function.""" + if self._created_variables: + # In this case we have created variables on the first call, so we run the + # defunned version which is guaranteed to never create variables. + return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable + elif self._stateful_fn is not None: + # In this case we have not created variables on the first call. So we can + # run the first trace but we should fail if variables are created. + results = self._first_trace(*args, **kwds) + if self._created_variables: + raise ValueError("Creating variables on a non-first call to a function" + " decorated with tf.function.") + return results + + self._initialize(args, kwds) + + if not self._created_variables: + # If we did not create any variables the trace we have is good enough. + return _call_concrete(self._concrete_stateful_fn, args, kwds)() def fn_with_cond(*inner_args, **inner_kwds): """Conditionally runs initialization if it's needed.""" condition = True - for variable in created_variables: + for variable in self._created_variables: condition = condition and resource_variable_ops.var_is_initialized_op( variable.handle) - # We want to call second_graph_function if possible because it avoids - # recomputing potentially expensive initializers. + # We want to call stateless_fn if possible because it avoids recomputing + # potentially expensive initializers. return control_flow_ops.cond( condition, - lambda: second_graph_function(*inner_args, **inner_kwds), - lambda: first_concrete(*inner_args, **inner_kwds)) + lambda: self._stateless_fn(*inner_args, **inner_kwds), + _call_concrete(self._concrete_stateful_fn, inner_args, inner_kwds)) + + return function_lib.defun(fn_with_cond)(*args, **kwds) + + @property + def python_function(self): + """The python function wrapped in this tf.function.""" + return self._python_function + + def get_concrete_function(self, *args, **kwargs): + """Returns a `Function` object specialized to inputs and execution context. + + `args` and `kwargs` are ignored if this `PolymorphicFunction` was created + with an `input_signature`. + + Args: + *args: inputs to specialize on. + **kwargs: inputs to specialize on. - return function.defun(fn_with_cond)(*args, **kwds) + Raises: + ValueError: if this object has not yet been called on concrete values. + """ + # TODO(apassos) figure out how to handle this case (what should we return + # here?) + if self._stateful_fn is None: + raise ValueError( + "Call this function with concrete values before asking for a" + " concrete function. Calling the function will ensure that, in" + " case this function creates variables, that those are properly" + " initialized.") + if self._created_variables: + # In this case we have created variables on the first call, so we run the + # defunned version which is guaranteed to never create variables. + return self._stateless_fn.get_concrete_function(*args, **kwargs) + elif self._stateful_fn is not None: + # In this case we have not created variables on the first call. So we can + # run the first trace but we should fail if variables are created. + concrete = self._first_trace.get_concrete_function(*args, **kwargs) + if self._created_variables: + raise ValueError("Creating variables on a non-first call to a function" + " decorated with tf.function.") + return concrete - return decorated_fn + def __get__(self, instance, owner): + """Makes it possible to defun instance methods.""" + del owner + # `instance` here is the instance that this `PolymorphicFunction` was + # accessed through; e.g., for + # + # class Foo(object): + # + # @function.defun + # def bar(self): + # ... + # + # foo = Foo() + # foo.bar() # `foo.bar` is a `PolymorphicFunction` instance + # + # then `instance` will be `foo` (and `owner` will be `Foo`). We create a + # new instance of PolymorphicFunction here to allow different instances each + # to create variables once, thereby allowing methods to be decorated with + # tf.function. Keeps a cache to avoid retracing the function every time the + # descriptor is accessed. + if instance not in self._descriptor_cache: + self._descriptor_cache[instance] = PolymorphicFunction( + functools.partial(self.python_function, instance), + self._input_signature) + return self._descriptor_cache[instance] + + +def function(fn=None, input_signature=None): + """Defines a function as per the "functions, not sessions" document.""" + return PolymorphicFunction(fn, input_signature) diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py index 804436c4bb..39bad726d0 100644 --- a/tensorflow/python/eager/def_function_test.py +++ b/tensorflow/python/eager/def_function_test.py @@ -29,7 +29,7 @@ class DefFunctionTest(test.TestCase): def testNoVariables(self): - @def_function.def_function + @def_function.function def fn(x): return 2 * x @@ -37,7 +37,7 @@ class DefFunctionTest(test.TestCase): def testFailIfVariablesAreCreatedMoreThanOnce(self): - @def_function.def_function + @def_function.function def fn(x): return variables.Variable(1.0) + x @@ -47,7 +47,7 @@ class DefFunctionTest(test.TestCase): def testFailIfVariablesAreCreatedMoreThanOnceNoWeakRef(self): state = [] - @def_function.def_function + @def_function.function def fn(x): state.append(variables.Variable(1.0)) return state[-1] + x @@ -59,7 +59,7 @@ class DefFunctionTest(test.TestCase): state = [] - @def_function.def_function + @def_function.function def fn(x): if not state: state.append(variables.Variable(2.0)) @@ -72,7 +72,7 @@ class DefFunctionTest(test.TestCase): state = [] - @def_function.def_function + @def_function.function def fn(x): if not state: state.append(variables.Variable(2.0 * x)) @@ -81,6 +81,28 @@ class DefFunctionTest(test.TestCase): self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0) self.assertAllEqual(fn(constant_op.constant(3.0)), 6.0) + def testMethod(self): + + class MyModel(object): + + def __init__(self): + self.var = None + + @def_function.function + def apply(self, x): + if self.var is None: + self.var = variables.Variable(2.0) + return self.var * x + + m0 = MyModel() + self.assertAllEqual(m0.apply(3.0), 6.0) + # Calling twice to exercise that we do not recreate variables. + m0.var.assign(3.0) + self.assertAllEqual(m0.apply(3.0), 9.0) + + m1 = MyModel() + self.assertAllEqual(m1.apply(3.0), 6.0) + if __name__ == '__main__': ops.enable_eager_execution() -- GitLab From 931353c5f79c2d419afb3a5ecac59184c5558351 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 11:37:29 -0700 Subject: [PATCH 129/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216392772 --- .../core/ops/compat/ops_history.v1.pbtxt | 119 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 119 ++++++++++++++++++ 2 files changed, 238 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 9df0ece69b..dcea70dffb 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -58500,6 +58500,125 @@ op { minimum: 1 } } +op { + name: "SdcaOptimizerV2" + input_arg { + name: "sparse_example_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_feature_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_feature_values" + type: DT_FLOAT + number_attr: "num_sparse_features_with_values" + } + input_arg { + name: "dense_features" + type: DT_FLOAT + number_attr: "num_dense_features" + } + input_arg { + name: "example_weights" + type: DT_FLOAT + } + input_arg { + name: "example_labels" + type: DT_FLOAT + } + input_arg { + name: "sparse_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_weights" + type: DT_FLOAT + number_attr: "num_sparse_features" + } + input_arg { + name: "dense_weights" + type: DT_FLOAT + number_attr: "num_dense_features" + } + input_arg { + name: "example_state_data" + type: DT_FLOAT + } + output_arg { + name: "out_example_state_data" + type: DT_FLOAT + } + output_arg { + name: "out_delta_sparse_weights" + type: DT_FLOAT + number_attr: "num_sparse_features" + } + output_arg { + name: "out_delta_dense_weights" + type: DT_FLOAT + number_attr: "num_dense_features" + } + attr { + name: "loss_type" + type: "string" + allowed_values { + list { + s: "logistic_loss" + s: "squared_loss" + s: "hinge_loss" + s: "smooth_hinge_loss" + s: "poisson_loss" + } + } + } + attr { + name: "adaptive" + type: "bool" + default_value { + b: false + } + } + attr { + name: "num_sparse_features" + type: "int" + has_minimum: true + } + attr { + name: "num_sparse_features_with_values" + type: "int" + has_minimum: true + } + attr { + name: "num_dense_features" + type: "int" + has_minimum: true + } + attr { + name: "l1" + type: "float" + } + attr { + name: "l2" + type: "float" + } + attr { + name: "num_loss_partitions" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_inner_iterations" + type: "int" + has_minimum: true + minimum: 1 + } +} op { name: "SdcaShrinkL1" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2048ad26ac..93a297458f 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -28140,6 +28140,125 @@ op { minimum: 1 } } +op { + name: "SdcaOptimizerV2" + input_arg { + name: "sparse_example_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_feature_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_feature_values" + type: DT_FLOAT + number_attr: "num_sparse_features_with_values" + } + input_arg { + name: "dense_features" + type: DT_FLOAT + number_attr: "num_dense_features" + } + input_arg { + name: "example_weights" + type: DT_FLOAT + } + input_arg { + name: "example_labels" + type: DT_FLOAT + } + input_arg { + name: "sparse_indices" + type: DT_INT64 + number_attr: "num_sparse_features" + } + input_arg { + name: "sparse_weights" + type: DT_FLOAT + number_attr: "num_sparse_features" + } + input_arg { + name: "dense_weights" + type: DT_FLOAT + number_attr: "num_dense_features" + } + input_arg { + name: "example_state_data" + type: DT_FLOAT + } + output_arg { + name: "out_example_state_data" + type: DT_FLOAT + } + output_arg { + name: "out_delta_sparse_weights" + type: DT_FLOAT + number_attr: "num_sparse_features" + } + output_arg { + name: "out_delta_dense_weights" + type: DT_FLOAT + number_attr: "num_dense_features" + } + attr { + name: "loss_type" + type: "string" + allowed_values { + list { + s: "logistic_loss" + s: "squared_loss" + s: "hinge_loss" + s: "smooth_hinge_loss" + s: "poisson_loss" + } + } + } + attr { + name: "adaptive" + type: "bool" + default_value { + b: false + } + } + attr { + name: "num_sparse_features" + type: "int" + has_minimum: true + } + attr { + name: "num_sparse_features_with_values" + type: "int" + has_minimum: true + } + attr { + name: "num_dense_features" + type: "int" + has_minimum: true + } + attr { + name: "l1" + type: "float" + } + attr { + name: "l2" + type: "float" + } + attr { + name: "num_loss_partitions" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_inner_iterations" + type: "int" + has_minimum: true + minimum: 1 + } +} op { name: "SdcaShrinkL1" input_arg { -- GitLab From 12e164d1e7c0b197f06d5d3c2ed26318b89b5e4c Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Tue, 9 Oct 2018 11:38:15 -0700 Subject: [PATCH 130/411] Return ::tensorflow::Status in Toco Graph Transformations. PiperOrigin-RevId: 216392908 --- .../convert_expanddims_to_reshape.cc | 16 +++++---- .../convert_pure_conv_to_depthwise.cc | 24 +++++++------ .../convert_reorder_axes.cc | 15 +++++--- .../convert_squeeze_to_reshape.cc | 18 ++++++---- .../convert_trivial_addn_to_add.cc | 12 ++++--- .../convert_trivial_pack_to_reshape.cc | 16 +++++---- .../convert_trivial_tile_to_concat.cc | 16 +++++---- .../convert_trivial_transpose_to_reshape.cc | 16 +++++---- .../create_im2col_arrays.cc | 12 ++++--- .../toco/graph_transformations/dequantize.cc | 14 +++++--- .../graph_transformations/drop_fake_quant.cc | 13 ++++--- .../drop_im2col_arrays.cc | 11 +++--- .../ensure_bias_vectors.cc | 9 +++-- ...int8_weights_safe_for_fast_int8_kernels.cc | 14 ++++---- .../fuse_activation_functions.cc | 22 +++++++----- .../fuse_binary_into_following_affine.cc | 32 +++++++++-------- .../fuse_binary_into_preceding_affine.cc | 36 ++++++++++--------- .../fuse_broadcast_into_following_binary.cc | 16 +++++---- .../graph_transformations.cc | 2 +- .../graph_transformations.h | 29 +++++++++------ .../graph_transformations/hardcode_min_max.cc | 7 ++-- .../identify_dilated_conv.cc | 16 +++++---- .../identify_l2_normalization.cc | 22 +++++++----- .../graph_transformations/identify_l2_pool.cc | 15 ++++---- .../graph_transformations/identify_lstm.cc | 33 +++++++++-------- .../identify_lstm_merge_inputs.cc | 16 +++++---- .../identify_lstm_split_inputs.cc | 16 +++++---- .../graph_transformations/identify_prelu.cc | 19 +++++----- .../graph_transformations/identify_relu1.cc | 17 +++++---- .../make_initial_dequantize_operator.cc | 8 +++-- .../merge_reshape_into_preceding_transpose.cc | 26 +++++++------- .../move_binary_operator_before_reshape.cc | 30 +++++++++------- ...gate_activation_function_into_constants.cc | 20 ++++++----- .../propagate_array_data_types.cc | 18 ++++++---- .../propagate_default_min_max.cc | 8 +++-- .../propagate_fake_quant_num_bits.cc | 12 ++++--- .../propagate_fixed_sizes.cc | 12 ++++--- .../toco/graph_transformations/quantize.cc | 13 ++++--- ...minmax_and_narrow_range_from_fake_quant.cc | 12 ++++--- .../remove_final_dequantize_op.cc | 12 ++++--- .../remove_tensorflow_assert.cc | 10 ++++-- .../remove_tensorflow_identity.cc | 10 ++++-- .../remove_trivial_binary.cc | 22 +++++++----- .../remove_trivial_concatenation.cc | 12 ++++--- .../remove_trivial_concatenation_input.cc | 12 ++++--- .../remove_trivial_fake_quant.cc | 12 ++++--- ...emove_trivial_quantized_activation_func.cc | 15 ++++---- .../remove_trivial_quantized_min_max.cc | 12 ++++--- .../remove_trivial_reshape.cc | 12 ++++--- .../remove_trivial_slice.cc | 11 +++--- .../graph_transformations/remove_unused_op.cc | 15 ++++---- .../reorder_elementwise_unary.cc | 18 ++++++---- .../reorder_reshape_transpose.cc | 24 +++++++------ .../resolve_batch_normalization.cc | 12 ++++--- .../resolve_batch_to_space_nd_attributes.cc | 21 ++++++----- .../resolve_constant_binary.cc | 16 +++++---- .../resolve_constant_concatenation.cc | 24 ++++++++----- .../resolve_constant_fake_quant.cc | 16 +++++---- .../resolve_constant_fill.cc | 26 ++++++++------ .../resolve_constant_gather.cc | 20 ++++++----- .../resolve_constant_pack.cc | 16 +++++---- .../resolve_constant_random_uniform.cc | 18 ++++++---- .../resolve_constant_range.cc | 20 ++++++----- .../resolve_constant_reshape.cc | 20 ++++++----- .../resolve_constant_select.cc | 21 ++++++----- .../resolve_constant_shape_or_rank.cc | 16 +++++---- .../resolve_constant_slice.cc | 28 ++++++++------- .../resolve_constant_strided_slice.cc | 20 ++++++----- .../resolve_constant_tile.cc | 16 +++++---- .../resolve_constant_transpose.cc | 18 ++++++---- .../resolve_constant_unary.cc | 28 ++++++++------- .../resolve_fake_quant_args_from_vars.cc | 14 +++++--- .../resolve_gather_attributes.cc | 20 +++++++---- .../resolve_multiply_by_zero.cc | 30 +++++++++------- .../resolve_pad_attributes.cc | 17 +++++---- .../resolve_padv2_attributes.cc | 17 +++++---- .../resolve_reduce_attributes.cc | 30 +++++++++++----- .../resolve_reorder_axes.cc | 13 ++++--- .../resolve_reshape_attributes.cc | 14 +++++--- .../resolve_slice_attributes.cc | 22 +++++++----- .../resolve_space_to_batch_nd_attributes.cc | 21 ++++++----- .../resolve_squeeze_attributes.cc | 12 ++++--- .../resolve_strided_slice_attributes.cc | 32 ++++++++++------- .../resolve_tensorflow_concat.cc | 12 ++++--- .../resolve_tensorflow_matmul.cc | 12 ++++--- .../resolve_tensorflow_merge.cc | 12 ++++--- .../resolve_tensorflow_switch.cc | 12 ++++--- .../resolve_transpose_attributes.cc | 18 ++++++---- .../shuffle_fc_weights.cc | 27 +++++++------- .../resolve_constant_concatenation_test.cc | 15 ++++++-- .../tests/resolve_constant_unary_test.cc | 3 +- .../unfuse_activation_functions.cc | 12 ++++--- .../unpartition_embedding_lookup.cc | 24 +++++++------ .../unroll_batch_matmul.cc | 15 +++++--- 94 files changed, 1003 insertions(+), 617 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc index 310a88484c..8a945ac435 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertExpandDimsToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto expand_it = model->operators.begin() + op_index; if (expand_it->get()->type != OperatorType::kExpandDims) { - return false; + return ::tensorflow::Status::OK(); } ExpandDimsOperator* expand_op = static_cast(expand_it->get()); @@ -38,18 +41,18 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { const auto& input_array = model->GetArray(expand_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& axis_array = model->GetArray(expand_op->inputs[1]); if (!axis_array.has_shape()) { // Yield until input axis array shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1); if (!axis_array.buffer) { // Yield until the input axis array is constant - return false; + return ::tensorflow::Status::OK(); } int axis = axis_array.GetBuffer().data[0]; std::vector reshape_dims(input_array.shape().dims()); @@ -90,7 +93,8 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(expand_it->get(), expand_op); model->operators.erase(expand_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc index e88839be5d..a151012891 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc @@ -24,29 +24,32 @@ limitations under the License. namespace toco { -bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertPureConvToDepthwise::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto conv_it = model->operators.begin() + op_index; if (conv_it->get()->type != OperatorType::kConv) { - return false; + return ::tensorflow::Status::OK(); } const auto* conv_op = static_cast(conv_it->get()); if (conv_op->stride_width != conv_op->stride_height) { - return false; + return ::tensorflow::Status::OK(); } if ((conv_op->dilation_width_factor != 1) || (conv_op->dilation_height_factor != 1)) { // Depthwise conv does not support dilation - return false; + return ::tensorflow::Status::OK(); } auto& input_array = model->GetArray(conv_op->inputs[0]); if (!input_array.has_shape()) { // Shapes not propagated yet - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dims(3) != 1) { // Not a pure convolution: Conv does accumulation across the depth // dimension. - return false; + return ::tensorflow::Status::OK(); } const auto& weights_name = conv_op->inputs[1]; @@ -56,15 +59,15 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { "Not changing %s to DepthwiseConv because the weights is consumed by " "another op.", LogName(*conv_op)); - return false; + return ::tensorflow::Status::OK(); } auto& weights_array = model->GetArray(weights_name); if (!weights_array.buffer) { // Yield until the weights are resolved as a constant array. - return false; + return ::tensorflow::Status::OK(); } if (weights_array.data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } // At this point we know we have a pure conv. Rewrite it as DepthwiseConv. AddMessageF( @@ -112,7 +115,8 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { } *weights_array.mutable_shape()->mutable_dims() = {1, width, height, depth}; weights_buffer.data = depthwise_conv_weights_data; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc index 0d274fc687..4a264e1cf1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc @@ -86,9 +86,12 @@ TransposeOperator* CreateTransposeFromReorderAxes( // Converts ReorderAxes into Transpose and Reshape which are compatible with the // TFLite interpreter. -bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertReorderAxes::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto reorder_it = model->operators.begin() + op_index; - if (reorder_it->get()->type != OperatorType::kReorderAxes) return false; + if (reorder_it->get()->type != OperatorType::kReorderAxes) + return ::tensorflow::Status::OK(); auto* reorder_op = static_cast(reorder_it->get()); CHECK_EQ(reorder_op->inputs.size(), 1); @@ -113,8 +116,9 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { // Yield if input array contains constants or if output array size has not // been adjusted to reflect the permutations in ReorderAxes. ReorderAxes will // be merged into a constant array when possible. - if (IsConstantParameterArray(*model, constant_input_array_name)) return false; - if (!output_array.has_shape()) return false; + if (IsConstantParameterArray(*model, constant_input_array_name)) + return ::tensorflow::Status::OK(); + if (!output_array.has_shape()) return ::tensorflow::Status::OK(); const auto input_axes_order = reorder_op->input_axes_order; const auto output_axes_order = reorder_op->output_axes_order; @@ -143,7 +147,8 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { CHECK_EQ(reorder_it->get(), reorder_op); model->operators.erase(reorder_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc index 81cedb5dad..a0bd1ed4a4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc @@ -30,10 +30,13 @@ namespace toco { // means that the data layout will never change with this op, just the shape. // By converting these to reshapes once we have run shape propagation we allow // standard reshape optimization transforms to do their magic. -bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertSqueezeToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto squeeze_it = model->operators.begin() + op_index; if (squeeze_it->get()->type != OperatorType::kSqueeze) { - return false; + return ::tensorflow::Status::OK(); } auto squeeze_op = static_cast(squeeze_it->get()); CHECK_EQ(squeeze_op->inputs.size(), 1); @@ -42,16 +45,16 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { const auto& input_array = model->GetArray(squeeze_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dimensions_count() == 0) { // Input array cannot be 0-D. - return false; + return ::tensorflow::Status::OK(); } if (!model->HasArray(squeeze_op->outputs[0]) || !model->GetArray(squeeze_op->outputs[0]).has_shape()) { // Yield until shape propagation has set the output shape for us. - return false; + return ::tensorflow::Status::OK(); } // We use the output shape that has been calculated by shape propagation. @@ -59,7 +62,7 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { // Empty shapes will not work as empty data arrays. if (output_shape.dimensions_count() == 0) { - return false; + return ::tensorflow::Status::OK(); } auto* reshape_op = new TensorFlowReshapeOperator; @@ -79,7 +82,8 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(squeeze_it->get(), squeeze_op); model->operators.erase(squeeze_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc index dcaaddbf3b..d7cacf77f4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc @@ -20,10 +20,13 @@ namespace toco { // This pass will convert an AddN operator with only 2 inputs into a regular Add // operator, to which more optimizations may apply. -bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialAddNToAdd::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto addn_it = model->operators.begin() + op_index; if (addn_it->get()->type != OperatorType::kAddN) { - return false; + return ::tensorflow::Status::OK(); } AddNOperator* addn_op = static_cast(addn_it->get()); CHECK_GE(addn_op->inputs.size(), 2); @@ -31,7 +34,7 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { // We only reduce AddN with N=2 to a regular Add. if (addn_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } // Copy inputs & outputs to regular Add. @@ -45,7 +48,8 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { addn_it = add_it + 1; CHECK_EQ(addn_it->get(), addn_op); model->operators.erase(addn_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc index 75113a2a8c..78779243a9 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc @@ -25,27 +25,30 @@ limitations under the License. namespace toco { -bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialPackToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto pack_it = model->operators.begin() + op_index; if (pack_it->get()->type != OperatorType::kPack) { - return false; + return ::tensorflow::Status::OK(); } auto* pack_op = static_cast(pack_it->get()); if (pack_op->inputs.size() > 1) { // Not trivial. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(pack_op->outputs.size(), 1); const auto& input_array = model->GetArray(pack_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dimensions_count() == 0) { // Input array cannot be 0-D. // (Unsure if this is TF behavior, but was required to get a test to pass.) - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Converting trivial %s to a reshape", LogName(*pack_op)); @@ -75,7 +78,8 @@ bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(pack_it->get(), pack_op); model->operators.erase(pack_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc index b689be0792..b6d712ca44 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc @@ -21,10 +21,13 @@ limitations under the License. namespace toco { -bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialTileToConcat::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto tile_it = model->operators.begin() + op_index; if (tile_it->get()->type != OperatorType::kTile) { - return false; + return ::tensorflow::Status::OK(); } auto* tile_op = static_cast(tile_it->get()); @@ -34,13 +37,13 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { if (!input_array.has_shape() || !multiples_array.has_shape() || !output_array.has_shape()) { // Yield until PropagateFixedSizes has been run on this op. - return false; + return ::tensorflow::Status::OK(); } // Note: We can assume we have error checked inputs in PropagateFixedSizes. if (!multiples_array.buffer) { // Yield until the multiples is constant. - return false; + return ::tensorflow::Status::OK(); } std::vector const& multiples = multiples_array.GetBuffer().data; @@ -59,7 +62,7 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { // The tile is non-trivial. Good luck. AddMessageF("Tile %s is non-trivial (has more than one multiply dimension)", LogName(*tile_op)); - return false; + return ::tensorflow::Status::OK(); } // The tile is like a concat. @@ -88,7 +91,8 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { CHECK_EQ(tile_it->get(), tile_op); model->operators.erase(tile_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc index 5a36a90b38..e5a96d4335 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc @@ -48,10 +48,13 @@ bool TransposeAffectsMemoryOrder(std::vector perm, } // namespace -bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialTransposeToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto transpose_it = model->operators.begin() + op_index; if (transpose_it->get()->type != OperatorType::kTranspose) { - return false; + return ::tensorflow::Status::OK(); } TransposeOperator* transpose_op = static_cast(transpose_it->get()); @@ -60,14 +63,14 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { const auto& output_array = model->GetArray(transpose_op->outputs[0]); if (!input_array.has_shape() || !output_array.has_shape()) { // Yield until PropagateFixedSizes has been run on this op. - return false; + return ::tensorflow::Status::OK(); } // Note: We can assume we have error checked inputs in PropagateFixedSizes. // Check that the permutation has propogated. std::vector const& perm = transpose_op->perm; if (perm.empty()) { - return false; + return ::tensorflow::Status::OK(); } // This transpose is trivial if non-unitary dimensions remain in the same @@ -76,7 +79,7 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { std::vector const& output_dims = output_array.shape().dims(); if (TransposeAffectsMemoryOrder(perm, input_dims)) { - return false; + return ::tensorflow::Status::OK(); } // This transpose is trivial. Replace it with a Reshape op. @@ -109,7 +112,8 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(transpose_it->get(), transpose_op); model->operators.erase(transpose_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc index 1e68cd678b..ebc0e9afca 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc @@ -73,18 +73,22 @@ bool ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { return true; } -bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) { +::tensorflow::Status CreateIm2colArrays::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); switch (op->type) { case OperatorType::kConv: - return ProcessConvOperator(model, static_cast(op)); + *modified = ProcessConvOperator(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kTransposeConv: - return ProcessTransposeConvOperator( + *modified = ProcessTransposeConvOperator( model, static_cast(op)); + return ::tensorflow::Status::OK(); default: - return false; + return ::tensorflow::Status::OK(); } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc index 1688586733..2119174950 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc @@ -186,24 +186,27 @@ bool DequantizeArray(const string& array_name, } // namespace -bool Dequantize::Run(Model* model, std::size_t op_index) { +::tensorflow::Status Dequantize::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; auto* op = op_it->get(); if (op->type == OperatorType::kDequantize) { auto& input_array = model->GetArray(op->inputs[0]); if (input_array.data_type == ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } if (input_array.final_data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } input_array.data_type = ArrayDataType::kFloat; input_array.quantization_params = nullptr; auto& output_array = model->GetArray(op->outputs[0]); output_array.data_type = ArrayDataType::kFloat; output_array.quantization_params = nullptr; - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } std::vector arrays; @@ -220,7 +223,8 @@ bool Dequantize::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc index 95558ef5ec..1555cf60a1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc @@ -25,21 +25,23 @@ limitations under the License. namespace toco { -bool DropFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status DropFakeQuant::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(fakequant_base_op); if (!fakequant_op->minmax) { - return false; + return ::tensorflow::Status::OK(); } const auto& output_array = model->GetArray(fakequant_op->outputs[0]); if (!output_array.minmax) { - return false; + return ::tensorflow::Status::OK(); } // Drop min/max inputs @@ -50,7 +52,8 @@ bool DropFakeQuant::Run(Model* model, std::size_t op_index) { } fakequant_op->inputs.resize(1); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc index f7fd878b7e..7d66ea5dd2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc @@ -19,15 +19,17 @@ limitations under the License. namespace toco { -bool DropIm2colArrays::Run(Model* model, std::size_t op_index) { +::tensorflow::Status DropIm2colArrays::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto conv_it = model->operators.begin() + op_index; if (conv_it->get()->type != OperatorType::kConv) { - return false; + return ::tensorflow::Status::OK(); } auto* conv_op = static_cast(conv_it->get()); if (conv_op->outputs.size() < 2) { // Conv op does not have im2col. - return false; + return ::tensorflow::Status::OK(); } // Drop the im2col array. @@ -36,7 +38,8 @@ bool DropIm2colArrays::Run(Model* model, std::size_t op_index) { conv_op->outputs.resize(1); AddMessageF("Dropped an im2col array for %s", LogName(*conv_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc index e80ed036b3..72b1dda3be 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc @@ -62,17 +62,20 @@ bool ProcessLinearOperator(Model* model, Operator* op) { } } // namespace -bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) { +::tensorflow::Status EnsureBiasVectors::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto* op = model->operators[op_index].get(); if (op->type == OperatorType::kConv || op->type == OperatorType::kDepthwiseConv || op->type == OperatorType::kFullyConnected) { if (ProcessLinearOperator(model, op)) { AddMessageF("Added bias vector to %s as %s", LogName(*op), op->inputs[2]); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc index c13fc0de75..60dcd52684 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc @@ -108,8 +108,9 @@ namespace toco { // we can foresee these 'fast int8 kernels' to remain important to have into // the 2020s. // -bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status EnsureUint8WeightsSafeForFastInt8Kernels::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto& op = *model->operators[op_index]; int weights_index = 0; switch (op.type) { @@ -148,16 +149,16 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, // That's why at the moment we only handle operators that use a GEMM // (Conv, fully-connected --- note that LSTM merely wraps a // fully-connected operator). - return false; + return ::tensorflow::Status::OK(); } const string& name = op.inputs[weights_index]; auto& array = model->GetArray(name); if (!array.buffer) { - return false; + return ::tensorflow::Status::OK(); } if (array.data_type != ArrayDataType::kUint8) { - return false; + return ::tensorflow::Status::OK(); } auto& buffer_data = array.GetMutableBuffer().data; @@ -212,7 +213,8 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, AddMessageF("Tweaked weights values for %s", LogName(op)); } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc index c5ce3fcd95..88511a7d3c 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc @@ -25,27 +25,30 @@ limitations under the License. namespace toco { -bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseActivationFunctions::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto ac_it = model->operators.begin() + op_index; const auto* ac_op = ac_it->get(); if (ac_op->type != OperatorType::kRelu6 && ac_op->type != OperatorType::kRelu1 && ac_op->type != OperatorType::kRelu) { - return false; + return ::tensorflow::Status::OK(); } // Find the op producing the array passed to this activation function Operator* op = GetOpWithOutput(*model, ac_op->inputs[0]); - if (!op) return false; + if (!op) return ::tensorflow::Status::OK(); if (CountTrueOutputs(*model, *op) > 1) { AddMessageF( "Not fusing activation function %s into %s because it has more than " "one consumed output", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs[0], ac_op->inputs[0]); @@ -57,7 +60,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function into %s because it is consumed by more " "than 1 other operator", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsDiscardableArray(*model, op->outputs[0])) { @@ -65,7 +68,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s into %s because output %s it is not " "discardable", LogName(*ac_op), LogName(*op), op->outputs[0]); - return false; + return ::tensorflow::Status::OK(); } if (op->fused_activation_function != FusedActivationFunctionType::kNone) { @@ -73,7 +76,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s into %s because it already has a " "fused activation function", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } if (!OperatorSupportsFusedActivation(op->type)) { @@ -81,7 +84,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s because the %s op doesn't support " "it", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Fusing activation function %s into the preceding %s", @@ -98,7 +101,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { model->EraseArray(ac_op->inputs[0]); op->outputs[0] = ac_op->outputs[0]; model->operators.erase(ac_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc index dcbbead517..0de22b8ff4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc @@ -150,14 +150,17 @@ void FuseMulOrDivParamsIntoFollowingAffine(Model* model, Operator* following_op, } // namespace -bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBinaryIntoFollowingAffine::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -175,12 +178,12 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can fuse into a constant. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -192,7 +195,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { if (index_of_constant_input != 1) { AddMessageF("Not fusing %s because the denominator is not constant", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -204,7 +207,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s into the following affine op, because we only know " "how to do so when the constant operand is a scalar", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -212,7 +215,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { FusedActivationFunctionType::kNone) { AddMessageF("Not fusing %s because it has a fused activation function", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* following_op = GetOpWithInput(*model, binary_op->outputs[0]); @@ -221,7 +224,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because it is not consumed by exactly one other op", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (following_op->type != OperatorType::kConv && @@ -231,14 +234,14 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the following %s is not of one of the supported " "types", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } if (following_op->inputs.size() < 3) { AddMessageF( "Not fusing %s because the following %s does not have a bias vector", LogName(*following_op), LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& weights = model->GetArray(following_op->inputs[1]); @@ -248,7 +251,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the following %s has non-constant weights or " "bias arrays", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } // Try to fuse the binary params into the following op's params @@ -260,7 +263,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because the following %s does not use VALID padding", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } } if (following_op->type == OperatorType::kDepthwiseConv) { @@ -269,7 +272,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because the following %s does not use VALID padding", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } } FuseAddOrSubParamsIntoFollowingAffine(model, following_op, binary_op, @@ -294,7 +297,8 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { model->EraseArray(old_constant_param_name); } model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc index b324631579..b8da756d85 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc @@ -188,14 +188,17 @@ void FuseMulOrDivParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, } } // namespace -bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBinaryIntoPrecedingAffine::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; const auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -213,12 +216,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can fuse into a constant. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -230,7 +233,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { if (index_of_constant_input != 1) { AddMessageF("Not fusing %s because the denominator is not constant", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -239,12 +242,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { if (!preceding_op) { AddMessageF("Not fusing %s because it is not the output of another op", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } for (const string& output_array : model->flags.output_arrays()) { if (preceding_op->outputs[0] == output_array) { - return false; + return ::tensorflow::Status::OK(); } } @@ -255,7 +258,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s is not of one of the supported " "types", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (preceding_op->fused_activation_function != @@ -264,14 +267,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has a fused activation " "function", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (preceding_op->inputs.size() < 3) { AddMessageF( "Not fusing %s because the preceding %s does not have a bias vector", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& weights_name = preceding_op->inputs[1]; @@ -289,14 +292,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has a non-constant bias " "array", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (count_ops_consuming_bias > 1) { AddMessageF( "Not fusing %s because the bias of the preceding %s is consumed by " "another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } } else { if (!weights.buffer || !bias.buffer) { @@ -304,14 +307,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has non-constant weights or " "bias arrays", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (count_ops_consuming_weights > 1 || count_ops_consuming_bias > 1) { AddMessageF( "Not fusing %s because the weights or bias of the preceding %s is " "consumed by another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -323,7 +326,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the output of the preceding %s is consumed by " "another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Fusing %s into the preceding %s", LogName(*binary_op), @@ -352,7 +355,8 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { model->EraseArray(old_constant_param_name); } model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc index 874d8def57..4848867b9a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc @@ -51,19 +51,22 @@ bool IsBroadcastingOp(const Model& model, Operator* op) { // Finds an operation that looks like a broadcast (concat of the same sources // along the last dimension) and drops it by relying on the ability of certain // binary ops to perform an implicit broadcast. -bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBroadcastIntoFollowingBinary::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); // Test for binary ops of types that we know how to resolve if (binary_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } // NOTE: either of these ops may be nullptr if the input array is constant. @@ -78,14 +81,14 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { if (!is_op_0_broadcast && !is_op_1_broadcast) { // Neither input is a broadcast-looking thing. AddMessageF("Neither input looks broadcasty"); - return false; + return ::tensorflow::Status::OK(); } else if (is_op_0_broadcast && is_op_1_broadcast) { AddMessageF( "Unable to fuse broadcast into %s as both inputs (%s, %s) are " "broadcasts", LogName(*binary_op), op[0] ? LogName(*op[0]) : "(?)", op[1] ? LogName(*op[1]) : "(?)"); - return false; + return ::tensorflow::Status::OK(); } int broadcast_index = is_op_0_broadcast ? 0 : 1; @@ -96,7 +99,8 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { binary_op->inputs[broadcast_index] = op[broadcast_index]->inputs[0]; // We leave the broadcast op in; it'll get cleaned up if it's not used later. - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc index 6961e23690..8b0bc2d865 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc @@ -142,7 +142,7 @@ bool GraphTransformationsPass(int increment, Model* model, for (const auto& transformation : transformations) { CHECK(!changed_now); CHECK(transformation->Messages().empty()); - changed_now = transformation->Run(model, op_index); + CHECK(transformation->Run(model, op_index, &changed_now).ok()); const char* made_a_change_msg = changed_now ? "made a change" : "did NOT make a change"; const int log_level = diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 4d213b3f9c..a89db320ea 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -27,7 +27,8 @@ namespace toco { class GraphTransformation { public: - virtual bool Run(Model* model, std::size_t op_index) = 0; + virtual ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) = 0; virtual const char* Name() const = 0; virtual ~GraphTransformation() {} // Returns the list of messages that this graph transformation @@ -104,11 +105,12 @@ class GraphTransformationsSet { void RunGraphTransformations(Model* model, const string& message, const GraphTransformationsSet& transformations); -#define DECLARE_GRAPH_TRANSFORMATION(GTName) \ - class GTName : public GraphTransformation { \ - public: \ - bool Run(Model* model, std::size_t op_index) override; \ - const char* Name() const override { return #GTName; } \ +#define DECLARE_GRAPH_TRANSFORMATION(GTName) \ + class GTName : public GraphTransformation { \ + public: \ + ::tensorflow::Status Run(Model* model, std::size_t op_index, \ + bool* modified) override; \ + const char* Name() const override { return #GTName; } \ }; // List of all graph transformations @@ -200,7 +202,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveGatherAttributes) class PropagateDefaultMinMax : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "PropagateDefaultMinMax"; } bool has_any_ranges_defined() const { return !type_ranges_.empty(); } @@ -218,7 +221,8 @@ class PropagateDefaultMinMax : public GraphTransformation { class RemoveTrivialReshape : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "RemoveTrivialReshape"; } bool treat_expand_dims_as_trivial() const { return treat_expand_dims_as_trivial_; @@ -233,7 +237,8 @@ class RemoveTrivialReshape : public GraphTransformation { class ResolveConstantFakeQuant : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "ResolveConstantFakeQuant"; } // True if the num_bits should adjust the final data type. @@ -250,7 +255,8 @@ class ResolveConstantFakeQuant : public GraphTransformation { class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "EnsureUint8WeightsSafeForFastInt8Kernels"; } @@ -267,7 +273,8 @@ class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation { class IdentifyDilatedConv : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "IdentifyDilatedConv"; } bool identify_depthwise_conv() const { return identify_depthwise_conv_; } void set_identify_depthwise_conv(bool val) { identify_depthwise_conv_ = val; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 3114fa93e8..72df53548b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -372,7 +372,9 @@ bool HardcodeMinMaxForLstmCell(Model* model, Operator* op) { } } // namespace -bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status HardcodeMinMax::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); bool changed = false; @@ -467,7 +469,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { if (changed) { AddMessageF("Hardcoded min-max through %s", LogName(*op)); } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc index aac77eb39e..9e4a3005a1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc @@ -168,7 +168,10 @@ bool ResolveDilatedConv(Model* model, Operator* conv_base_op, Operator* stb_op, return true; } -bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyDilatedConv::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* stb_op = it->get(); @@ -176,17 +179,17 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { // *************************************************************************** // SpaceToBatch Op. if (stb_op->type != OperatorType::kSpaceToBatchND) { - return false; + return ::tensorflow::Status::OK(); } if (stb_op->inputs.size() != 3) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(stb_op->outputs.size(), 1); // Extract the dilation factor from Input[1] of SpaceToBatch // TODO(mjmatthews): Support 2D dilation factors. const auto& block_shape_array = model->GetArray(stb_op->inputs[1]); if (!block_shape_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(block_shape_array.shape().dimensions_count(), 1); int dilation_factor = @@ -195,7 +198,7 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { // Expand Op auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]); if (!post_stb_op) { - return false; + return ::tensorflow::Status::OK(); } bool has_expand_op = false; if (post_stb_op->type == OperatorType::kExpandDims) { @@ -229,7 +232,8 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc index b78efd7fc3..78f60f52fb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc @@ -39,7 +39,10 @@ std::vector>::iterator FindOperator( } } // namespace -bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyL2Normalization::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto div_it = model->operators.begin() + op_index; const auto* div_or_mul_op = div_it->get(); OperatorType expected_op_type_producing_div_or_mul_input; @@ -48,7 +51,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { } else if (div_or_mul_op->type == OperatorType::kMul) { expected_op_type_producing_div_or_mul_input = OperatorType::kRsqrt; } else { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(div_or_mul_op->inputs.size(), 2); Operator* op_producing_div_or_mul_input[2] = { @@ -58,14 +61,14 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { if (!op_producing_div_or_mul_input[1] || op_producing_div_or_mul_input[1]->type != expected_op_type_producing_div_or_mul_input) { - return false; + return ::tensorflow::Status::OK(); } Operator* sqrt_or_rsqrt_op = op_producing_div_or_mul_input[1]; CHECK_EQ(sqrt_or_rsqrt_op->inputs.size(), 1); Operator* op_producing_sqrt_or_rsqrt_input = GetOpWithOutput(*model, sqrt_or_rsqrt_op->inputs[0]); if (!op_producing_sqrt_or_rsqrt_input) { - return false; + return ::tensorflow::Status::OK(); } // There may be an Add or a Maximum here, adding or clamping to a "small" @@ -105,7 +108,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { " because the operator producing the input to the square root, %s," ", does not match the expected pattern", LogName(*op_producing_sqrt_or_rsqrt_input)); - return false; + return ::tensorflow::Status::OK(); } } @@ -116,7 +119,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: " "expected Sum op, got %s", LogName(*sum_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* square_op = GetOpWithOutput(*model, sum_op->inputs[0]); @@ -125,7 +128,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: " "expected Square op, got %s", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(square_op->inputs.size(), 1); @@ -135,7 +138,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: %s does not " "take the same input as the Mul/Div node", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } // Create and emplace the new L2Normalization @@ -162,7 +165,8 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op)); model->EraseArray(div_or_mul_op->inputs[1]); model->operators.erase(FindOperator(model, div_or_mul_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc index 705e73779b..13664bb344 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc @@ -38,11 +38,13 @@ std::vector>::iterator FindOperator( } } // namespace -bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyL2Pool::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto sqrt_it = model->operators.begin() + op_index; const auto* sqrt_op = sqrt_it->get(); if (sqrt_op->type != OperatorType::kSqrt) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(sqrt_op->inputs.size(), 1); @@ -56,7 +58,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { AddMessageF( "Giving up trying to identify L2Pool subgraph: " "expected AveragePool op, but Sqrt op has no preceding op"); - return false; + return ::tensorflow::Status::OK(); } if (prev_to_sqrt_op->type != OperatorType::kAveragePool) { @@ -64,7 +66,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Pool subgraph: " "expected AveragePool op, got %s", LogName(*prev_to_sqrt_op)); - return false; + return ::tensorflow::Status::OK(); } avpool_op = static_cast(prev_to_sqrt_op); @@ -77,7 +79,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Pool subgraph: " "expected Square op, got %s", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } // Create and emplace L2Pool node. @@ -107,7 +109,8 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, avpool_op)); model->operators.erase(FindOperator(model, sqrt_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc index c0b014b45e..7fd8f906e2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc @@ -132,7 +132,9 @@ bool MatchOperatorInputs(const Operator& op, const Model& model, } // namespace -bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyLstmCell::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // This LSTM cell identification method is not invariant to commutation of // commutative operator inputs. For example, if input[0] and input[1] of the // final output multiplication were swapped, this method would not identify it @@ -143,13 +145,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { auto op_it = model->operators.begin() + op_index; Operator* final_output_mul = op_it->get(); if (final_output_mul->type != OperatorType::kMul) { - return false; + return ::tensorflow::Status::OK(); } Operator *state_output_tanh, *fc_output_sig; if (!MatchOperatorInputs(*final_output_mul, *model, OperatorType::kTanh, &state_output_tanh, OperatorType::kLogistic, &fc_output_sig)) { - return false; + return ::tensorflow::Status::OK(); } // State output TanH @@ -158,7 +160,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { Operator* state_combine_add; if (!MatchOperatorInputs(*state_output_tanh, *model, OperatorType::kAdd, &state_combine_add)) { - return false; + return ::tensorflow::Status::OK(); } // State forget & remember addition @@ -166,7 +168,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_combine_add, *model, OperatorType::kMul, &state_forget_mul, OperatorType::kMul, &state_remember_mul)) { - return false; + return ::tensorflow::Status::OK(); } const string prev_state = state_forget_mul->inputs[0]; @@ -175,7 +177,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_forget_mul, *model, OperatorType::kNone, nullptr, OperatorType::kLogistic, &state_forget_sig)) { - return false; + return ::tensorflow::Status::OK(); } // State remember gate @@ -183,40 +185,40 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_remember_mul, *model, OperatorType::kLogistic, &state_remember_sig, OperatorType::kTanh, &state_info_tanh)) { - return false; + return ::tensorflow::Status::OK(); } // State remember "information" activation function Operator* fc_output_split; if (!MatchOperatorInputs(*state_info_tanh, *model, OperatorType::kSplit, &fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // State remember gate activation function Operator* tmp; if (!MatchOperatorInputs(*state_remember_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // State forget gate activation function if (!MatchOperatorInputs(*state_forget_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected output activation function if (!MatchOperatorInputs(*fc_output_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected output split Operator* fully_connected; if (!MatchOperatorInputs(*fc_output_split, *model, OperatorType::kNone, nullptr, OperatorType::kFullyConnected, &fully_connected)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected op @@ -225,13 +227,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { OperatorType::kConcatenation, &concat_inputs, OperatorType::kNone, nullptr, OperatorType::kNone, nullptr)) { - return false; + return ::tensorflow::Status::OK(); } if (static_cast(fully_connected)->weights_format != FullyConnectedWeightsFormat::kDefault) { // Not yet implemented: experimental shuffled weights in fused LSTM cell. - return false; + return ::tensorflow::Status::OK(); } // Emplace a new LSTM cell operator @@ -300,7 +302,8 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, *fully_connected)); DeleteArrayIfUnused(concat_inputs->outputs[0], model); model->operators.erase(FindOperator(model, *concat_inputs)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc index 5b6a984ee1..6ccce923f3 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc @@ -25,19 +25,22 @@ limitations under the License. namespace toco { -bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MergeLstmCellInputs::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Find lstm cell. auto op_it = model->operators.begin() + op_index; auto src_op = op_it->get(); if (src_op->type != OperatorType::kLstmCell) { - return false; + return ::tensorflow::Status::OK(); } // Already a compact LstmCell. Do not need to merge cell inputs. const auto* src_lstm_op = static_cast(src_op); if (src_lstm_op->kernel_type != LstmCellOperator::KERNEL_FULL || src_lstm_op->inputs.size() != kExtendedLstmInputCount) { - return false; + return ::tensorflow::Status::OK(); } // Identify prev_activ_input, prev_state_input as required Op inputs, @@ -45,12 +48,12 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { string prev_activ_input; if (!GetMatchingRnnArray(model, src_op->outputs[kOutputTensor], &prev_activ_input)) { - return false; + return ::tensorflow::Status::OK(); } string prev_state_input; if (!GetMatchingRnnArray(model, src_op->outputs[kCellStateTensor], &prev_state_input)) { - return false; + return ::tensorflow::Status::OK(); } // Get LstmCell's cell, input, output size. @@ -184,7 +187,8 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { DeleteArrayIfUnused(src_op->inputs[kOutputGateBiasTensor], model); model->operators.erase(FindOp(*model, src_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc index 46d1fce50e..ad5120e2aa 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc @@ -25,19 +25,22 @@ limitations under the License. namespace toco { -bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { +::tensorflow::Status SplitLstmCellInputs::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Find lstm cell. auto op_it = model->operators.begin() + op_index; auto curr_op = op_it->get(); if (curr_op->type != OperatorType::kLstmCell) { - return false; + return ::tensorflow::Status::OK(); } const auto* curr_lstm_op = static_cast(curr_op); // Already an extended LstmCell. Do not need to split cell inputs. if (curr_lstm_op->kernel_type != LstmCellOperator::KERNEL_BASIC || curr_lstm_op->inputs.size() != LstmCellOperator::NUM_INPUTS) { - return false; + return ::tensorflow::Status::OK(); } // Make sure the WEIGHTS_INPUT and BIASES_INPUT are constant arrays, @@ -46,13 +49,13 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { *model, curr_op->inputs[LstmCellOperator::WEIGHTS_INPUT]) || !IsConstantParameterArray( *model, curr_op->inputs[LstmCellOperator::BIASES_INPUT])) { - return false; + return ::tensorflow::Status::OK(); } // Make sure propagate_fixed_sizes has defined the size of the output. if (!model->GetArray(curr_op->outputs[LstmCellOperator::ACTIV_OUTPUT]) .has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Emplace a new LstmCell operator with extended inputs (kernel/lstm.cc). @@ -168,7 +171,8 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { DeleteArrayIfUnused(curr_op->inputs[LstmCellOperator::BIASES_INPUT], model); model->operators.erase(FindOp(*model, curr_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc index b90a156a0d..c11fee4dc9 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc @@ -43,13 +43,15 @@ limitations under the License. namespace toco { -bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyPRelu::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto add_op_it = model->operators.begin() + op_index; const auto* add_op = add_op_it->get(); if (add_op == nullptr || add_op->type != OperatorType::kAdd || add_op->inputs.size() != 2 || add_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } const auto* relu_input_op = GetOpWithOutput(*model, add_op->inputs[0]); @@ -57,7 +59,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { relu_input_op->inputs.size() != 1 || relu_input_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } // TODO(ycling): Both Add and Mul are commutative. Support the case where @@ -66,7 +68,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { if (mul_op == nullptr || mul_op->type != OperatorType::kMul || mul_op->inputs.size() != 2 || mul_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } const auto neg_alpha_tensor_name = mul_op->inputs[0]; @@ -75,7 +77,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { if (relu_neg_input_op == nullptr || relu_neg_input_op->inputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } const Operator* final_input_op; @@ -92,13 +94,13 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { relu_neg_input_op->type != OperatorType::kRelu || relu_neg_input_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } final_input_op = neg_input_op; } if (relu_input_op->inputs[0] != final_input_op->inputs[0]) { - return false; + return ::tensorflow::Status::OK(); } const auto input_tensor_name = relu_input_op->inputs[0]; @@ -128,7 +130,8 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { // intermediate tensors aren't used by other ops, those will be removed by // other graph transformation rules. model->operators.erase(FindOp(*model, add_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc index 94820a0166..51d0629362 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc @@ -56,13 +56,15 @@ int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op, } } // namespace -bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyRelu1::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // Follow sequences of min+max and max+min. First get the leading op. const auto op_it = model->operators.begin() + op_index; const auto* op_0 = op_it->get(); if (op_0->type != OperatorType::kMinimum && op_0->type != OperatorType::kMaximum) { - return false; + return ::tensorflow::Status::OK(); } // Get the paired op and ensure it's the counter to the first. @@ -71,17 +73,17 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { (op_1->type != OperatorType::kMinimum && op_1->type != OperatorType::kMaximum) || op_0->type == op_1->type) { - return false; + return ::tensorflow::Status::OK(); } const auto* min_op = op_0->type == OperatorType::kMinimum ? op_0 : op_1; const auto* max_op = op_0->type == OperatorType::kMaximum ? op_0 : op_1; if (min_op->inputs.size() != 2 || max_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (min_op->outputs.size() != 1 || max_op->outputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } // Get the original input to the min+max pair. @@ -90,7 +92,7 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { int max_scalar_input_index = GetSingleScalarInputIndexOfBinaryOp(model, max_op, -1.0f); if (min_scalar_input_index == -1 || max_scalar_input_index == -1) { - return false; + return ::tensorflow::Status::OK(); } int op_0_scalar_input_index = op_0 == min_op ? min_scalar_input_index : max_scalar_input_index; @@ -111,7 +113,8 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, op_0)); model->operators.erase(FindOperator(model, op_1)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc index f684de08ab..5bf17d5b4c 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc @@ -97,7 +97,10 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op, return true; } -bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MakeInitialDequantizeOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // This is effectively a transformation applied to edges. We iterate over the // specified node (op) and proceed for input edges. const auto it = model->operators.begin() + op_index; @@ -114,7 +117,8 @@ bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) { } } } - return change_made; + *modified = change_made; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc index 95bc7f7d4b..06de9b1cd8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc @@ -102,18 +102,19 @@ std::vector ReshapeToTranspose(const Model& model, // to be merged if the reshape does not affect memory ordering and does not // affects the number of dimensions. This only occurs when only unary dimensions // are shifting position. -bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status MergeReshapeIntoPrecedingTranspose::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* reshape_op = ConvertOperator( it->get(), OperatorType::kReshape); if (reshape_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } const string intermediate_name = reshape_op->inputs[0]; @@ -121,13 +122,13 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // Guarantee the input is only consume by the reshape. if (CountOpsWithInput(*model, intermediate_name) != 1) { - return false; + return ::tensorflow::Status::OK(); } // Check for the parent operator. const auto& transpose_it = FindOpWithOutput(*model, intermediate_name); if (transpose_it == model->operators.end()) { - return false; + return ::tensorflow::Status::OK(); } // Find the parent operator and guarantee it is a transpose. @@ -135,16 +136,16 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, transpose_it->get(), OperatorType::kTranspose); if (transpose_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { - return false; + return ::tensorflow::Status::OK(); } if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, false /*allow_extra_unary_dimensions*/)) { - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is not an output array. @@ -153,7 +154,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, "Cannot fuse %s and %s as it would invalidate the transpose " "output array.", LogName(*transpose_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Merging operations %s and %s", LogName(*transpose_op), @@ -172,7 +173,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // Remove the reshape as passthrough operation. if (!RemoveTrivialPassthroughOp(this, model, op_index)) { - return false; + return ::tensorflow::Status::OK(); } // Update transpose_op's constant buffer to contain the new permutation. @@ -184,7 +185,8 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // transpose_ops's shape will likely has changed. model->GetArray(transpose_op->outputs[0]).clear_shape(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc index 7f44c65285..f0d8d924ad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc @@ -54,7 +54,10 @@ bool IsTailOfShape(const Shape& tail, const Shape& shape) { // // Note we are testing for one particular case of a broader set of possible // binary-reshape op transformations. This transformation could be generalized. -bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MoveBinaryOperatorBeforeReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; Operator* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && @@ -69,7 +72,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { binary_op->type != OperatorType::kLessEqual && binary_op->type != OperatorType::kGreater && binary_op->type != OperatorType::kGreaterEqual) { - return false; + return ::tensorflow::Status::OK(); } // BINARY OP INPUT CHECKS @@ -81,11 +84,11 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { if (!input_is_const[0] && !input_is_const[1]) { // To limit our scope, we require one constant input. Though there's no // reason this transformation wouldn't work with all variable inputs. - return false; + return ::tensorflow::Status::OK(); } if (input_is_const[0] && input_is_const[1]) { // Both inputs are constants. Leave this for constants propagation. - return false; + return ::tensorflow::Status::OK(); } const int constant_input_idx = input_is_const[0] ? 0 : 1; const int variable_input_idx = input_is_const[0] ? 1 : 0; @@ -98,13 +101,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { AddMessageF( "Not moving %s because it's non-constant input shape is not resolved.", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsTailOfShape( model->GetArray(binary_op->inputs[constant_input_idx]).shape(), model->GetArray(binary_op->inputs[variable_input_idx]).shape())) { // Constant array shape must be the latter part of the variable shape. - return false; + return ::tensorflow::Status::OK(); } // RESHAPE OP CHECKS @@ -113,13 +116,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { if (reshape_it == model->operators.end()) { AddMessageF("Not moving %s because it's variable input is not connected.", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { AddMessageF("Not moving %s because the preceding %s is not a reshape op", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& reshape_input_array = model->GetArray(reshape_op->inputs[0]); if (!reshape_input_array.has_shape()) { @@ -127,14 +130,14 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { "Not moving %s because it's non-constant input shape is not resolved " "yet", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsTailOfShape( model->GetArray(binary_op->inputs[constant_input_idx]).shape(), model->GetArray(reshape_op->outputs[0]).shape())) { // Constant array shape must be the latter part of the binary op output // shape. - return false; + return ::tensorflow::Status::OK(); } // EXTRA CHECKS ON CONNECTING ARRAY @@ -143,7 +146,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { AddMessageF( "Not moving %s because the output of reshape op %s is an output op.", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } } int count_ops_consuming_output = @@ -154,7 +157,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { "Not moving %s because the output of reshape op %s is consumed by " "another op", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } // SWAP ORDER OF BINARY AND RESHAPE OPS @@ -172,7 +175,8 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { // Clear binary output shape so it will be re-propagated model->GetArray(binary_op->outputs[0]).clear_shape(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc index cf17c49b10..9c1ed2b732 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc @@ -26,20 +26,21 @@ limitations under the License. namespace toco { -bool PropagateActivationFunctionIntoConstants::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status PropagateActivationFunctionIntoConstants::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto ac_it = model->operators.begin() + op_index; const auto* ac_op = ac_it->get(); if (ac_op->type != OperatorType::kRelu6 && ac_op->type != OperatorType::kRelu1 && ac_op->type != OperatorType::kRelu) { - return false; + return ::tensorflow::Status::OK(); } // Find the op producing the array passed to this activation function. auto* src_op = GetOpWithOutput(*model, ac_op->inputs[0]); if (!src_op) { - return false; + return ::tensorflow::Status::OK(); } // Ensure the src_op is not used without the activation function applied. @@ -57,7 +58,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, src_op_input = src_op->inputs[0]; break; default: - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(src_op->outputs[0], ac_op->inputs[0]); @@ -69,7 +70,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, "Not propagating activation function %s into %s:%s because it is not " "constant", LogName(*ac_op), LogName(*src_op), src_op_input); - return false; + return ::tensorflow::Status::OK(); } // Get the array we'll be working with and ensure it's a compatible type. @@ -79,7 +80,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, "Not propagating activation function %s into %s:%s because it is " "non-float data", LogName(*ac_op), LogName(*src_op), src_op_input); - return false; + return ::tensorflow::Status::OK(); } auto& const_array_data = const_array.GetMutableBuffer().data; @@ -108,14 +109,15 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, } default: LOG(FATAL) << "Unsupported activation function " << LogName(*ac_op); - return false; + return ::tensorflow::Status::OK(); } const_array_data[i] = new_value; } AddMessageF("Propagated activation function %s into %s:%s", LogName(*ac_op), LogName(*src_op), src_op_input); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index 323eefcd3a..40cd6dea82 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -32,7 +32,10 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op, } } // namespace -bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateArrayDataTypes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); @@ -40,7 +43,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { for (const auto& input : op->inputs) { if (!model->IsOptionalArray(input) && model->GetArray(input).data_type == ArrayDataType::kNone) { - return false; + return ::tensorflow::Status::OK(); } } // Record data types of output before processing, so we can see at the @@ -131,7 +134,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { auto* rand_op = static_cast(op); // The output type of RandomUniform is specified with an attribute if (rand_op->dtype == ArrayDataType::kNone) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs.size(), 1); SetDataTypeForAllOutputs(model, op, rand_op->dtype); @@ -153,7 +156,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { // This can make unsupported_op->output_data_types have more elements than // op->outputs. if (unsupported_op->output_data_types.size() < op->outputs.size()) { - return false; + return ::tensorflow::Status::OK(); } for (int i = 0; i < op->outputs.size(); ++i) { const string& output = op->outputs[i]; @@ -164,7 +167,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { } case OperatorType::kExpandDims: { // Yield on ExpandDim until it is converted to Reshape - return false; + return ::tensorflow::Status::OK(); } case OperatorType::kSelect: { // Select produces outputs with the same type as their 2nd input @@ -248,10 +251,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { // Return true if any output data type changed, false if none changed. for (const auto& output : op->outputs) { if (old_output_data_types[output] != model->GetArray(output).data_type) { - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc index cd078ef189..3cf191436d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc @@ -39,7 +39,10 @@ bool SupportsMinMax(const Array& array) { // When provided a set of min/max values for uint8 arrays this will rescale // the values for other data types as required and preserving the floating point // range within the new type. -bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateDefaultMinMax::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); @@ -61,7 +64,8 @@ bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) { } } - return did_change; + *modified = did_change; + return ::tensorflow::Status::OK(); } // Sets the min/max on the given array, adjusting the reference_minmax for the diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc index 3ad6b0ec6f..d0113237ce 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc @@ -277,11 +277,14 @@ bool RecursivelyForwardPropagateDataType(GraphTransformation* transformation, // nice logging and integration with the graphviz video dumping mode. // In general you should not copy this style of transformation and stick to // local-only changes as seen in the other transformations. -bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateFakeQuantNumBits::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(op); @@ -290,7 +293,7 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { &quantized_data_type)) { AddMessageF("FakeQuant op %s num_bits=%d is out of range, ignoring", LogName(*op), fakequant_op->num_bits); - return false; + return ::tensorflow::Status::OK(); } const auto& final_minmax = *fakequant_op->minmax; @@ -311,7 +314,8 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { did_change |= RecursivelyForwardPropagateDataType(this, model, op, quantized_data_type); - return did_change; + *modified = did_change; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index d056a8add7..5496e2093e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1622,7 +1622,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) { } // namespace -bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateFixedSizes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); std::unordered_map> old_output_dims; @@ -1836,7 +1839,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { static_cast(op); // Attribute can be not specified, ignore it. if (unsupported_op->output_shapes.size() < op->outputs.size()) { - return false; + return ::tensorflow::Status::OK(); } for (int i = 0; i < op->outputs.size(); ++i) { const string& output = op->outputs[i]; @@ -1886,10 +1889,11 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { (old_output_dims[output] != model->GetArray(output).shape().dims())) { AddMessageF("Set shape of %s to [%s]", output, absl::StrJoin(model->GetArray(output).shape().dims(), ",")); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index fb299c31b7..29ea17dc61 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -439,7 +439,9 @@ void FixMinMaxPostQuantization(GraphTransformation* transformation, } // namespace -bool Quantize::Run(Model* model, std::size_t op_index) { +::tensorflow::Status Quantize::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // Our general "quantization" graph transformation consists in replacing // QuantizedInputArrays[] -> // DequantizeOperators[] -> @@ -460,7 +462,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { auto& op = *model->operators[op_index]; if (op.type == OperatorType::kDequantize || op.type == OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } // Our assumption here is that the input arrays are already quantized - @@ -497,7 +499,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { if (!array.minmax && !array.buffer) { LOG(ERROR) << "Can't quantize input array " << input << " because it lacks min/max info"; - return false; + return ::tensorflow::Status::OK(); } const auto* other_op = GetOpWithOutput(*model, input); if (other_op && other_op->type != OperatorType::kDequantize) { @@ -507,7 +509,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { "which means that we should yield and let other ops " "get quantized first", LogName(op), input); - return false; + return ::tensorflow::Status::OK(); } } } @@ -672,7 +674,8 @@ bool Quantize::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc index eaa9d3bcda..0c32218ff2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc @@ -51,18 +51,19 @@ bool ApplyAttrsToArray(GraphTransformation* transformation, Model* model, } // end namespace -bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fq_op = static_cast(fakequant_base_op); if (!fq_op->minmax) { // Need to be resolved first by ResolveFakeQuantArgsFromVars. - return false; + return ::tensorflow::Status::OK(); } // At this point, this FakeQuantOperator should have a MinMax @@ -74,7 +75,8 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model, bool changed = false; changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]); changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]); - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc index c3b2709a33..fe8023ab8f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc @@ -25,11 +25,14 @@ limitations under the License. namespace toco { -bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveFinalDequantizeOp::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto dequantize_it = model->operators.begin() + op_index; const auto* dequantize_op = dequantize_it->get(); if (dequantize_op->type != OperatorType::kDequantize) { - return false; + return ::tensorflow::Status::OK(); } const auto& output = dequantize_op->outputs[0]; // We can remove any dequantize op whose output is not consumed by @@ -38,7 +41,7 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { // in the middle of the graph might be designated as an output // array. if (CountOpsWithInput(*model, output)) { - return false; + return ::tensorflow::Status::OK(); } // If one of the model's output arrays was actually the Dequantize op's @@ -53,7 +56,8 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { AddMessageF("Removed final %s", LogName(*dequantize_op)); model->EraseArray(output); model->operators.erase(dequantize_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc index 73ad326299..be8c0acc7b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc @@ -23,11 +23,14 @@ limitations under the License. namespace toco { -bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTensorFlowAssert::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto assert_it = model->operators.begin() + op_index; const auto* assert_op = assert_it->get(); if (assert_op->type != OperatorType::kAssert) { - return false; + return ::tensorflow::Status::OK(); } bool changed = false; @@ -54,7 +57,8 @@ bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) { // That's it. We can stop here, no need to duplicate the work that // RemoveUnusedOp will do removing this now-unused node. - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc index 7ec7752f25..37fe5fa3d7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc @@ -25,14 +25,18 @@ limitations under the License. namespace toco { -bool RemoveTensorFlowIdentity::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTensorFlowIdentity::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto passthru_it = model->operators.begin() + op_index; const auto* passthru_op = passthru_it->get(); if (passthru_op->type != OperatorType::kIdentity) { - return false; + return ::tensorflow::Status::OK(); } - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc index 0dfdc40e4c..68c6fb65c5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc @@ -46,14 +46,17 @@ bool AreAllBufferElementsEqualTo(const std::vector& buffer_data, // For example, an Add operator is trivial if // one of its operands is constant 0, a Mul operator is trivial // if one of its operands is constant 1, etc. -bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialBinaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -66,12 +69,12 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can resolve here. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -84,7 +87,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { const auto& input_array_1 = model->GetArray(binary_op->inputs[1]); if (!input_array_0.has_shape() || !input_array_1.has_shape()) { // Both input shapes must be known. - return false; + return ::tensorflow::Status::OK(); } if (input_array_0.shape().dimensions_count() == input_array_1.shape().dimensions_count() && @@ -94,7 +97,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { "(lhs %s, rhs %s)", LogName(*binary_op), ShapeToString(input_array_0.shape()), ShapeToString(input_array_1.shape())); - return false; + return ::tensorflow::Status::OK(); } // Now check if the constant operand makes this binary @@ -103,7 +106,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { model->GetArray(binary_op->inputs[index_of_constant_input]); // For now, we only handle floats here. if (constant_input_array.data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } const auto& constant_input_float_data = constant_input_array.GetBuffer().data; @@ -121,12 +124,13 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { } if (!is_trivial) { - return false; + return ::tensorflow::Status::OK(); } // Now we know that this node is trivial, so we can remove it. AddMessageF("Removing trivial %s", LogName(*binary_op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc index 3ceb93d8ee..faaa2a828e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc @@ -25,16 +25,20 @@ limitations under the License. namespace toco { -bool RemoveTrivialConcatenation::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialConcatenation::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto concat_it = model->operators.begin() + op_index; auto* concat_op = concat_it->get(); if (concat_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } if (concat_op->inputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc index 936854a04f..ccfc181fe0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc @@ -25,7 +25,10 @@ limitations under the License. namespace toco { -bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialConcatenationInput::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // TensorFlow allows Concatenation nodes to have 0-D inputs, // and they are then treated as empty i.e. omitted from concatenation, // in violation of the notion that 0-D is equivalent to 1x1x1x1. @@ -36,7 +39,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { const auto concat_it = model->operators.begin() + op_index; auto* concat_op = concat_it->get(); if (concat_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } std::vector trivial_inputs; std::vector nontrivial_inputs; @@ -52,7 +55,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { } if (trivial_inputs.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Drop trivial inputs. @@ -63,7 +66,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { } } concat_op->inputs = nontrivial_inputs; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc index 2c8d04440f..5448a816bc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc @@ -64,23 +64,27 @@ bool IsFakeQuantTrivial(GraphTransformation* transformation, const Model& model, } // namespace // Removes FakeQuant ops that are trivial (have no effect, are redundant, etc). -bool RemoveTrivialFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialFakeQuant::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; auto* op = op_it->get(); if (op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(op); if (!IsFakeQuantTrivial(this, *model, *fakequant_op)) { AddMessageF("%s is not trivial", LogName(*fakequant_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*fakequant_op)); CHECK_EQ(fakequant_op->inputs.size(), 1); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc index 752560e075..4133815285 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc @@ -94,12 +94,13 @@ bool IsTrivialFusedActivationFunc( // Attempts to remove both fused and unfused activation functions if the // quantization params indicate that the representable values fall inside the // activation range. -bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status RemoveTrivialQuantizedActivationFunc::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->inputs.empty()) { - return false; + return ::tensorflow::Status::OK(); } if (IsTrivialUnfusedActivationFunc(this, *model, op->type, op->inputs[0])) { @@ -107,7 +108,8 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, "Removing trivial unfused activation function %s because the input " "minmax imply at least as tight a clamp anyway.", LogName(*op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } if (IsTrivialFusedActivationFunc(this, *model, op->fused_activation_function, op->outputs[0])) { @@ -117,9 +119,10 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, "because the output quantization parameters imply at least as tight " "a clamp anyway.", LogName(*op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc index 142c876b15..0f0ae4af69 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc @@ -69,22 +69,26 @@ bool IsTrivialMinMax(GraphTransformation* transformation, const Model& model, // Attempts to remove min/max functions if the quantization params indicate that // the representable values fall inside the clip range. -bool RemoveTrivialQuantizedMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialQuantizedMinMax::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); if ((op->type != OperatorType::kMinimum && op->type != OperatorType::kMaximum) || op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (IsTrivialMinMax(this, *model, op->type, op->inputs[0], op->inputs[1])) { AddMessageF( "Removing trivial min/max %s because the quantization parameters imply " "at least as tight a clamp anyway.", LogName(*op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc index 5295eeccec..1caf944879 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc @@ -81,22 +81,26 @@ bool IsReshapeTrivial(const Model& model, const Operator& op, } // namespace -bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } if (!IsReshapeTrivial(*model, *reshape_op, this)) { AddMessageF("%s is not trivial", LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*reshape_op)); CHECK_EQ(reshape_op->inputs.size(), 2); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc index 0cbbcd7c81..dcb0148d58 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc @@ -49,21 +49,24 @@ bool IsSliceTrivial(const Model& model, const Operator& op, } // namespace -bool RemoveTrivialSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialSlice::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* slice_op = reshape_it->get(); if (slice_op->type != OperatorType::kSlice) { - return false; + return ::tensorflow::Status::OK(); } if (!IsSliceTrivial(*model, *slice_op, this)) { - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*slice_op)); CHECK_EQ(slice_op->inputs.size(), 3); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc index dde91234a8..3cd5d06bae 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc @@ -25,7 +25,9 @@ limitations under the License. namespace toco { -bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveUnusedOp::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); @@ -58,7 +60,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { } for (const string& output_array : model->flags.output_arrays()) { if (output == output_array) { - return false; + return ::tensorflow::Status::OK(); } } for (const auto& rnn_state : model->flags.rnn_states()) { @@ -67,19 +69,19 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) || !IsDiscardableArray(*model, rnn_state.state_array()) || CountOpsWithInput(*model, rnn_state.state_array())) { - return false; + return ::tensorflow::Status::OK(); } } } if (CountOpsWithInput(*model, output)) { - return false; + return ::tensorflow::Status::OK(); } } if (op->unresolved_outputs) { AddMessageF("Not discarding %s because it has unresolved outputs.", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Discarding %s because none of its outputs is used.", @@ -105,7 +107,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { } } model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc index 550de83018..3c8d411089 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc @@ -63,29 +63,32 @@ bool IsMoveOperator(OperatorType optype) { // Swap elementwise operators such that all value operators occur before all // element move operators, e.g. negation then transpose. -bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ReorderElementwiseUnary::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto element_op_it = model->operators.begin() + op_index; std::unique_ptr& element_op = *element_op_it; if (!IsElementwiseOperator(element_op->type)) { - return false; + return ::tensorflow::Status::OK(); } const string intermediate_name = element_op->inputs[0]; auto it = FindOpWithOutput(*model, intermediate_name); if (it == model->operators.end()) { AddMessageF("No preceding operator"); - return false; + return ::tensorflow::Status::OK(); } std::unique_ptr& move_op = *it; if (!IsMoveOperator(move_op->type)) { AddMessageF("Preceding operator is not a move operator"); - return false; + return ::tensorflow::Status::OK(); } if (CountOpsWithInput(*model, intermediate_name) != 1) { AddMessageF("Input %s used elsewhere", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is discardable. @@ -94,7 +97,7 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { "Cannot swap elementwise as it would invalidate %s which is " "an output array.", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // op->inputs may change so we need to keep a value by copy. @@ -147,7 +150,8 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { // Swap the order of the operators. element_op.swap(move_op); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc index c907a597cb..a2c06e71e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc @@ -101,37 +101,40 @@ std::vector ComputeNewPerm(std::vector input_dims, // Swaps reshape-transpose to transpose-reshape whenever possible. This is // possible when the reshape does not affect memory ordering. -bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ReorderReshapeTranspose::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto transpose_it = model->operators.begin() + op_index; TransposeOperator* transpose_op = ConvertOperator( transpose_it->get(), OperatorType::kTranspose); if (transpose_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { // Wait for values to propagate. - return false; + return ::tensorflow::Status::OK(); } // Find the operator that produces the transpose op. auto reshape_it = FindOpWithOutput(*model, transpose_op->inputs[0]); if (reshape_it == model->operators.end()) { - return false; + return ::tensorflow::Status::OK(); } TensorFlowReshapeOperator* reshape_op = ConvertOperator(reshape_it->get(), OperatorType::kReshape); if (reshape_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } // Ignore if the reshape is uninitialized. if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Need to copy to keep static if permutated. @@ -142,7 +145,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // Intermediate should not be consumed by any other operators. if (CountOpsWithInput(*model, intermediate_name) != 1) { AddMessageF("Input %s used elsewhere", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is not an output array. @@ -151,7 +154,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { "Cannot reorder reshape-transpose as it would invalidate %s which is " "an output array.", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Get the arrays. @@ -173,7 +176,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // dimensions then it can be moved between the transpose. if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, true /*allow_extra_unary_dims*/)) { - return false; + return ::tensorflow::Status::OK(); } if (!IsDiscardableArray(*model, output_name)) { @@ -242,7 +245,8 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // Swap the order of the operators. transpose_it->swap(*reshape_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc index 8f2c1f8162..a79779f55d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveBatchNormalization::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto bn_it = model->operators.begin() + op_index; if (bn_it->get()->type != OperatorType::kBatchNormalization) { - return false; + return ::tensorflow::Status::OK(); } const auto* bn_op = static_cast(bn_it->get()); @@ -53,7 +56,7 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { // so we need to exit early if these buffers don't exist (i.e. if the params // haven't yet been resolved as constants). if (!mean_array.buffer || !multiplier_array.buffer || !offset_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Create the new Mul, Add operators @@ -142,7 +145,8 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { DCHECK_EQ(bn_it->get(), bn_op); model->operators.erase(bn_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc index b8b35161d7..d039d7d690 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc @@ -24,31 +24,35 @@ limitations under the License. namespace toco { -bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveBatchToSpaceNDAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false; + if (op_it->get()->type != OperatorType::kBatchToSpaceND) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); // The attributes are resolved only when the 3 attributes (block_shape, // before_crops, after_crops) are all constant. if (!op->block_shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->inputs.size(), 3); if (!IsConstantParameterArray(*model, op->inputs[1]) || !IsConstantParameterArray(*model, op->inputs[2])) - return false; + return ::tensorflow::Status::OK(); // Handle crops const auto& crops_array = model->GetArray(op->inputs[2]); - if (!crops_array.has_shape()) return false; + if (!crops_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& crops_dims = crops_array.shape().dims(); if (crops_dims.size() != 2) { // Code only handles crops of 2 dimensions. Perhaps another transformation // will delete this op. - return false; + return ::tensorflow::Status::OK(); } const std::vector& crops_buffer = crops_array.GetBuffer().data; @@ -59,7 +63,7 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { // Handle block_shape const auto& block_shape_array = model->GetArray(op->inputs[1]); - if (!block_shape_array.has_shape()) return false; + if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& block_shape_dims = block_shape_array.shape().dims(); CHECK_EQ(block_shape_dims.size(), 1); const std::vector& block_shape_buffer = @@ -68,7 +72,8 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { op->block_shape.push_back(block_shape_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc index f7e5aa6609..586f546a30 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc @@ -188,7 +188,10 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model, } } // namespace -bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantBinaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; const auto* binary_op = binary_it->get(); // Test for binary ops of types that we know how to resolve @@ -204,7 +207,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { binary_op->type != OperatorType::kLessEqual && binary_op->type != OperatorType::kGreater && binary_op->type != OperatorType::kGreaterEqual) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -212,13 +215,13 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { const auto& input1_array = model->GetArray(binary_op->inputs[1]); // Check if both inputs are constant parameters. if (!input0_array.buffer || !input1_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } auto& output_array = model->GetArray(binary_op->outputs[0]); // Yield until the output array dims have been resolved. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // At the moment we don't want to care about fused activation functions. @@ -229,7 +232,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF( "Not resolving constant %s because it has a fused activation function", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } // Check that input data types agree. @@ -253,7 +256,8 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF("Resolved constant %s to the equivalent constant array", LogName(*binary_op)); model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc index d916ae0ddf..0c60fdfeb3 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc @@ -135,11 +135,14 @@ void SetMinMaxForConcatenedArray(GraphTransformation* transformation, } // namespace // Resolves the concatenation operator if all its inputs are constant arrays. -bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantConcatenation::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto concat_it = model->operators.begin() + op_index; const auto* concat_base_op = concat_it->get(); if (concat_base_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } const auto* concat_op = static_cast(concat_base_op); @@ -149,11 +152,15 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // We also make sure the shapes of the input arrays are known and they are // all discardable. const Operator* input_op = GetOpWithOutput(*model, input_name); - if (input_op) return false; - if (!IsConstantParameterArray(*model, input_name)) return false; - if (!model->GetArray(input_name).has_shape()) return false; - if (model->GetArray(input_name).quantization_params) return false; - if (!IsDiscardableArray(*model, input_name)) return false; + if (input_op) return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, input_name)) + return ::tensorflow::Status::OK(); + if (!model->GetArray(input_name).has_shape()) + return ::tensorflow::Status::OK(); + if (model->GetArray(input_name).quantization_params) + return ::tensorflow::Status::OK(); + if (!IsDiscardableArray(*model, input_name)) + return ::tensorflow::Status::OK(); } const int concatenation_axis = concat_op->axis; @@ -205,7 +212,8 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // Remove concatenate operator. model->operators.erase(concat_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc index f5f2f77460..4f330fdd84 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc @@ -59,11 +59,14 @@ void GetBoundsForQuantizedDataType(ArrayDataType quantized_data_type, } } -bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantFakeQuant::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; const auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } const auto* fakequant_op = @@ -71,12 +74,12 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { // Yield until the fakequant MinMax has been resolved. if (!fakequant_op->minmax) { - return false; + return ::tensorflow::Status::OK(); } // This transformation only applies when the input array is constant. if (!IsConstantParameterArray(*model, fakequant_op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(fakequant_op->inputs[0]); @@ -87,7 +90,7 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op, &quantized_data_type)) { AddMessageF("Unsupported FakeQuant num_bits=%d", fakequant_op->num_bits); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Resolving constant %s", LogName(*fakequant_op)); @@ -136,7 +139,8 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { } model->operators.erase(fakequant_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc index f6f95481b5..5400d395ff 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc @@ -41,11 +41,14 @@ bool ComputeFillArray(Model* model, FillOperator* op) { return true; } -bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantFill::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fill_it = model->operators.begin() + op_index; auto* base_op = fill_it->get(); if (base_op->type != OperatorType::kFill) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -55,44 +58,44 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } const auto& val_array = model->GetArray(op->inputs[1]); if (!val_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[1])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(val_array.shape()), 1); switch (output_array.data_type) { case ArrayDataType::kFloat: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kUint8: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt32: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt64: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; default: @@ -114,7 +117,8 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(fill_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc index 36d7dad0ce..6e3a6a69c2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc @@ -61,11 +61,14 @@ inline void Gather(const Array& input_array, int input_rank, // Resolves a constant Gather operation. // This simply performs the gather and produces the output array with the // appropriate values. -bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantGather::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kGather) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -74,28 +77,28 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } if (!op->axis) { // Yield until axis has been set by ResolveGatherAttributes. - return false; + return ::tensorflow::Status::OK(); } if (op->axis.value() != 0) { // Only handling axis=0 for now. AddMessageF("%s has axis %d; only axis=0 is supported", LogName(*op), op->axis.value()); - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); const Array& coords_array = model->GetArray(op->inputs[1]); @@ -142,7 +145,8 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc index e86616574d..e257ec37e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc @@ -49,11 +49,14 @@ void Pack(Model* model, PackOperator const& op) { } // namespace -bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantPack::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kPack) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -62,18 +65,18 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } for (const auto& input : op->inputs) { if (!IsConstantParameterArray(*model, input)) { // Yield if any input is mutable - return false; + return ::tensorflow::Status::OK(); } } @@ -111,7 +114,8 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc index 88d06d7dc7..db0fbba528 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc @@ -59,11 +59,14 @@ bool ComputeRandomUniformArray(Model* model, RandomUniformOperator* op) { return true; } -bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantRandomUniform::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* base_op = it->get(); if (base_op->type != OperatorType::kRandomUniform) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -73,12 +76,12 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } if ((op->seed == 0) && (op->seed2 == 0)) { @@ -86,13 +89,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { << "\" is truly random (using /dev/random system entropy). " "Therefore, cannot resolve as constant. Set \"seed\" or " "\"seed2\" attr non-zero to fix this"; - return false; + return ::tensorflow::Status::OK(); } switch (output_array.data_type) { case ArrayDataType::kFloat: if (!ComputeRandomUniformArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; // For future support of double or half. @@ -110,7 +113,8 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc index 1a0ba9e2bc..069d4dafaa 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc @@ -19,11 +19,14 @@ limitations under the License. namespace toco { -bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantRange::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* base_op = it->get(); if (base_op->type != OperatorType::kRange) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -31,23 +34,23 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { const auto& start_array = model->GetArray(op->inputs[0]); if (!start_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& limit_array = model->GetArray(op->inputs[1]); if (!limit_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& delta_array = model->GetArray(op->inputs[2]); if (!delta_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } for (const auto& input : op->inputs) { if (!IsConstantParameterArray(*model, input)) { // yield if any input is mutable - return false; + return ::tensorflow::Status::OK(); } } @@ -55,7 +58,7 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1) @@ -101,7 +104,8 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { // Delete the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc index a6f665b5f0..fccecef600 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc @@ -22,11 +22,14 @@ limitations under the License. namespace toco { // Resolves a constant reshape operation by copying the buffer. -bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -36,17 +39,17 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); @@ -54,7 +57,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { AddMessageF("Constant reshape is non-trivial (%s -> %s)", ShapeToString(input_array.shape()), ShapeToString(output_array.shape())); - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); @@ -95,7 +98,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { default: LOG(FATAL) << "Unsupported data type: " << ArrayDataTypeName(input_array.data_type); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Resolving constant reshape of %s", LogName(*op)); @@ -112,7 +115,8 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc index e880a3f44d..ab1e0bd7a0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc @@ -27,11 +27,14 @@ namespace toco { // This implementation is looking strictly for all-or-nothing on the select // condition. It's possible to enhance this by looking per-element and possibly // producing a Mul op. -bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantSelect::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kSelect) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -40,23 +43,23 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require the cond input to be constant. if (!IsConstantParameterArray(*model, op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } const Array& cond_array = model->GetArray(op->inputs[0]); CHECK(cond_array.data_type == ArrayDataType::kBool) << "Only bool conditions are supported"; const auto& cond_data = cond_array.GetBuffer().data; if (cond_data.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Check if the condition is the same for all elements. @@ -67,12 +70,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { "Cannot resolve %s as constant; cond_array has differing " "per-element values", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } } // Pass-through the selected input. - return RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2); + *modified = + RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc index 8a0e3e8995..a1756a8207 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc @@ -19,29 +19,32 @@ limitations under the License. namespace toco { -bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantShapeOrRank::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); if (!(op->type == OperatorType::kShape || op->type == OperatorType::kRank)) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs.size(), 1); auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been resolved - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the input array's shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } // Compute the output @@ -65,7 +68,8 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) { } model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc index b35c3e19c4..869dfae98e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc @@ -86,11 +86,14 @@ bool Slice(SliceOperator const& op, Array const& input_array, } // namespace -bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantSlice::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kSlice) { - return false; + return ::tensorflow::Status::OK(); } const SliceOperator* op = static_cast(base_op); @@ -99,49 +102,49 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } if (op->begin.empty() || op->size.empty()) { // Attributes have not resolved yet. - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[0])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); switch (output_array.data_type) { case ArrayDataType::kFloat: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kUint8: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt32: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt64: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; default: @@ -159,7 +162,8 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc index 8853ed87e6..99c5a64662 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc @@ -103,11 +103,14 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array, } // anonymous namespace -bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantStridedSlice::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kStridedSlice) { - return false; + return ::tensorflow::Status::OK(); } const StridedSliceOperator* op = @@ -117,28 +120,28 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } if (op->start_indices.empty() || op->stop_indices.empty() || op->strides.empty()) { // Attributes have not resolved yet. - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[0])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); @@ -164,7 +167,8 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, it->get()); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc index 5cfa1a5582..c5e93c9bad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc @@ -97,11 +97,14 @@ inline void Tile(const Array& input_array, const Array& multiples_array, } // namespace // Resolves a constant Tile operation. -bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantTile::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kTile) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -110,17 +113,17 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); const Array& multiples_array = model->GetArray(op->inputs[1]); @@ -159,7 +162,8 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc index fe15dfa06f..b759c4d6dd 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc @@ -101,11 +101,14 @@ void Transpose(Model* model, const Array& input_array, } // namespace -bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantTranspose::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kTranspose) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -114,17 +117,17 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); @@ -132,7 +135,7 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { if (op->perm.empty()) { // Yield until perm has been populated by ResolveTransposeAttributes. - return false; + return ::tensorflow::Status::OK(); } // We currently only support 1-4 dimensions. @@ -174,7 +177,8 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index 5364eebbc9..3034c1b1eb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -112,7 +112,10 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { return true; } -bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantUnaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto unary_it = model->operators.begin() + op_index; const auto* unary_op = unary_it->get(); // Test for unary ops of types that we know how to resolve. @@ -133,28 +136,28 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { case OperatorType::kRelu: break; default: - return false; + return ::tensorflow::Status::OK(); } // Check if the input is a constant parameter. if (!IsConstantParameterArray(*model, unary_op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } // if the unary op involves a tensor required by a rnn state, ignore it for (const auto& rnn_state : model->flags.rnn_states()) { if (unary_op->inputs[0] == rnn_state.back_edge_source_array()) { - return false; + return ::tensorflow::Status::OK(); } if (unary_op->inputs[0] == rnn_state.state_array()) { - return false; + return ::tensorflow::Status::OK(); } } auto& output_array = model->GetArray(unary_op->outputs[0]); if (!output_array.has_shape()) { // Yield until the output array dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } // At the moment we don't want to care about fused activation functions. @@ -166,7 +169,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { "Not resolving constant %s " " because it has a fused activation function", LogName(*unary_op)); - return false; + return ::tensorflow::Status::OK(); } // The min-max is only copied for ops that copy data without arithmetic. @@ -187,7 +190,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { "Not resolving constant %s because we currently only support casting " "to float", LogName(*unary_op)); - return false; + return ::tensorflow::Status::OK(); } if (cast_op->src_data_type != input_array.buffer->type) { AddMessageF( @@ -197,7 +200,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } } else { if (input_array.buffer->type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } input_float_data = &(input_array.GetBuffer().data); } @@ -239,7 +242,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs"; if (!IsConstantParameterArray(*model, unary_op->inputs[1])) { AddMessageF("Axis input is non-constant"); - return false; + return ::tensorflow::Status::OK(); } auto& axis_array = model->GetArray(unary_op->inputs[1]); CHECK(axis_array.data_type == ArrayDataType::kInt32); @@ -336,7 +339,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { default: LOG(FATAL) << "Unsupported activation function " << LogName(*unary_op); - return false; + return ::tensorflow::Status::OK(); } output_float_data[i] = new_value; } @@ -351,7 +354,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF("Resolved constant %s to the equivalent constant array", LogName(*unary_op)); model->operators.erase(unary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc index 0dda1fd0b3..eed971c1d5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc @@ -25,17 +25,20 @@ limitations under the License. namespace toco { -bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveFakeQuantArgsFromVars::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(fakequant_base_op); if (fakequant_op->minmax) { // Already resolved. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(fakequant_op->inputs.size(), 3); @@ -43,7 +46,7 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { // resolved to constant arrays. for (int i = 1; i <= 2; i++) { if (!IsConstantParameterArray(*model, fakequant_op->inputs[i])) { - return false; + return ::tensorflow::Status::OK(); } } @@ -74,7 +77,8 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { DeleteArrayIfUsedOnce(fakequant_op->inputs[i], model); } fakequant_op->inputs.resize(1); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc index ce825c91af..69209b8dec 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc @@ -24,20 +24,25 @@ limitations under the License. namespace toco { -bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveGatherAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto* gather_op = model->operators[op_index].get(); - if (gather_op->type != OperatorType::kGather) return false; + if (gather_op->type != OperatorType::kGather) + return ::tensorflow::Status::OK(); auto* op = static_cast(gather_op); if (op->axis) { // Attributes already resolved - return false; + return ::tensorflow::Status::OK(); } - if (op->inputs.size() != 3) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + if (op->inputs.size() != 3) return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); const auto& indices_array = model->GetArray(op->inputs[2]); - if (!indices_array.has_shape()) return false; + if (!indices_array.has_shape()) return ::tensorflow::Status::OK(); const auto& axis_data = indices_array.GetBuffer().data; CHECK_EQ(axis_data.size(), 1) << "Multidimensional gather not supported on " << LogName(*op); @@ -47,7 +52,8 @@ bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) { DeleteArrayIfUsedOnce(op->inputs[2], model); op->inputs.resize(2); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc index b2b2ea151b..ac94f45321 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc @@ -51,27 +51,30 @@ void FillArrayWithZeros(Array* array) { // Removes a multiplication by array of constant zeros by making the output // array an array of constant zeros and removing the input arrays if they are no // longer needed. -bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveMultiplyByZero::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto mul_it = model->operators.begin() + op_index; auto* mul_op = mul_it->get(); if (mul_op->type != OperatorType::kMul) { - return false; + return ::tensorflow::Status::OK(); } const auto& output_array_name = mul_op->outputs[0]; auto& output_array = model->GetArray(output_array_name); if (!IsDiscardableArray(*model, output_array_name)) { - return false; + return ::tensorflow::Status::OK(); } if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } // Yield if the output shape is not known yet. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // This transformation only handles the case where one operand is all 0's and @@ -83,12 +86,12 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can resolve here. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants propagation, not // for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -105,7 +108,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -114,7 +117,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -123,7 +126,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -132,14 +135,14 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; default: AddMessageF( "Cannot resolve multiply by 0 because of unsupported data type\n"); - return false; + return ::tensorflow::Status::OK(); } // Erase input arrays to the multiply if no longer used @@ -149,7 +152,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { // Erase the multiply operator. model->operators.erase(mul_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc index 8a8e723cf7..adc87753bc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc @@ -24,19 +24,23 @@ limitations under the License. namespace toco { -bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolvePadAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto pad_it = model->operators.begin() + op_index; auto* pad_op = pad_it->get(); - if (pad_op->type != OperatorType::kPad) return false; + if (pad_op->type != OperatorType::kPad) return ::tensorflow::Status::OK(); auto* op = static_cast(pad_op); - if (!op->left_padding.empty()) return false; + if (!op->left_padding.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 2); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); const auto& array = model->GetArray(op->inputs[1]); - if (!array.has_shape()) return false; + if (!array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& dims = array.shape().dims(); CHECK_EQ(dims.size(), 2); @@ -50,6 +54,7 @@ bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) { // TODO(dkalenichenko): Delete the extra input? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc index ebb023e342..1f0f17a37a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc @@ -24,19 +24,23 @@ limitations under the License. namespace toco { -bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolvePadV2Attributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto pad_it = model->operators.begin() + op_index; auto* pad_op = pad_it->get(); - if (pad_op->type != OperatorType::kPadV2) return false; + if (pad_op->type != OperatorType::kPadV2) return ::tensorflow::Status::OK(); auto* op = static_cast(pad_op); - if (!op->left_padding.empty()) return false; + if (!op->left_padding.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 3); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); const auto& array = model->GetArray(op->inputs[1]); - if (!array.has_shape()) return false; + if (!array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& dims = array.shape().dims(); CHECK_EQ(dims.size(), 2); @@ -50,6 +54,7 @@ bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) { // TODO(dkalenichenko): Delete the extra input? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc index 73198ac7c0..c3246ab90f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc @@ -39,23 +39,37 @@ bool ResolveAttributes(Model* model, T* op) { return true; } -bool ResolveReduceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReduceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; Operator* op = model->operators[op_index].get(); switch (op->type) { case OperatorType::kMean: - return ResolveAttributes(model, static_cast(op)); + *modified = ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kSum: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceProd: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceMin: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceMax: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kAny: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); default: - return false; + return ::tensorflow::Status::OK(); } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc index 8e150db6fa..ee5c4810e6 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc @@ -78,11 +78,13 @@ void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order, } } -bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReorderAxes::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->type != OperatorType::kReorderAxes) { - return false; + return ::tensorflow::Status::OK(); } auto* reorder_op = static_cast(op); @@ -93,11 +95,11 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { auto& input_array = model->GetArray(input_array_name); auto& output_array = model->GetArray(output_array_name); if (!input_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Yield until output dims have been resolved. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Reorder the input array dims and buffer data if (input_array.buffer->type == ArrayDataType::kFloat) { @@ -120,7 +122,8 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, op); RenameArray(model, output_array_name, input_array_name); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc index b615c9a545..7b7a59264f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc @@ -25,25 +25,29 @@ limitations under the License. namespace toco { -bool ResolveReshapeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReshapeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(reshape_op); - if (!op->shape.empty()) return false; + if (!op->shape.empty()) return ::tensorflow::Status::OK(); if (IsConstantParameterArray(*model, reshape_op->inputs[1])) { const auto& constant_input_array = model->GetArray(reshape_op->inputs[1]); op->shape = constant_input_array.GetBuffer().data; } - if (op->shape.empty()) return false; + if (op->shape.empty()) return ::tensorflow::Status::OK(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc index e760d08e5a..5a838168de 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc @@ -24,29 +24,35 @@ limitations under the License. namespace toco { -bool ResolveSliceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSliceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto slice_it = model->operators.begin() + op_index; auto* slice_op = slice_it->get(); - if (slice_op->type != OperatorType::kSlice) return false; + if (slice_op->type != OperatorType::kSlice) return ::tensorflow::Status::OK(); auto* op = static_cast(slice_op); - if (!op->begin.empty()) return false; + if (!op->begin.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 3); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); const auto& begin_array = model->GetArray(op->inputs[1]); - if (!begin_array.has_shape()) return false; + if (!begin_array.has_shape()) return ::tensorflow::Status::OK(); const auto& size_array = model->GetArray(op->inputs[2]); - if (!size_array.has_shape()) return false; + if (!size_array.has_shape()) return ::tensorflow::Status::OK(); op->begin = begin_array.GetBuffer().data; op->size = size_array.GetBuffer().data; // TODO(dkalenichenko): Delete the extra inputs? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc index fab50bec1f..3804145c4f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc @@ -24,16 +24,20 @@ limitations under the License. namespace toco { -bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSpaceToBatchNDAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kSpaceToBatchND) return false; + if (op_it->get()->type != OperatorType::kSpaceToBatchND) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); // The attributes are resolved only when the 3 attributes (block_shape, // before_paddings, after_paddings) are all constant. if (!op->block_shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } const int block_shape_index = 1; @@ -42,16 +46,16 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 3); if (!IsConstantParameterArray(*model, op->inputs[block_shape_index]) || !IsConstantParameterArray(*model, op->inputs[paddings_index])) - return false; + return ::tensorflow::Status::OK(); // Handle paddings. const auto& paddings_array = model->GetArray(op->inputs[paddings_index]); - if (!paddings_array.has_shape()) return false; + if (!paddings_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& paddings_dims = paddings_array.shape().dims(); if (paddings_dims.size() != 2) { // Code only handles padding of 2 dimensions. Perhaps another transformation // will delete this op. - return false; + return ::tensorflow::Status::OK(); } const std::vector& paddings_buffer = paddings_array.GetBuffer().data; @@ -63,7 +67,7 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { // Handle block_shape. const auto& block_shape_array = model->GetArray(op->inputs[block_shape_index]); - if (!block_shape_array.has_shape()) return false; + if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& block_shape_dims = block_shape_array.shape().dims(); CHECK_EQ(block_shape_dims.size(), 1); const std::vector& block_shape_buffer = @@ -72,7 +76,8 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { op->block_shape.push_back(block_shape_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc index e8bb85704e..c601b0774e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSqueezeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto* squeeze_op = model->operators[op_index].get(); if (squeeze_op->type != OperatorType::kSqueeze) { - return false; + return ::tensorflow::Status::OK(); } DCHECK_EQ(squeeze_op->inputs.size(), 1); DCHECK_EQ(squeeze_op->outputs.size(), 1); @@ -42,10 +45,11 @@ bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) { "Reshape op", LogName(*squeeze_op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc index 65132d7d1e..f54f5b42a1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc @@ -37,40 +37,47 @@ int PadAttributeArray(Array* attribute_array, std::vector pad_values, return mask; } -bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveStridedSliceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto slice_it = model->operators.begin() + op_index; auto* slice_op = slice_it->get(); - if (slice_op->type != OperatorType::kStridedSlice) return false; + if (slice_op->type != OperatorType::kStridedSlice) + return ::tensorflow::Status::OK(); auto* op = static_cast(slice_op); if (!op->start_indices.empty()) { // We have already resolved these attributes - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->inputs.size(), 4); const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // We require the dimensionality of the input to pad the indices - return false; + return ::tensorflow::Status::OK(); } auto& start_array = model->GetArray(op->inputs[1]); - if (!start_array.has_shape()) return false; + if (!start_array.has_shape()) return ::tensorflow::Status::OK(); if (toco::RequiredBufferSizeForShape(start_array.shape()) > 4) { // Only 1-4D arrays are supported for now. - return false; + return ::tensorflow::Status::OK(); } auto& stop_array = model->GetArray(op->inputs[2]); - if (!stop_array.has_shape()) return false; + if (!stop_array.has_shape()) return ::tensorflow::Status::OK(); auto& stride_array = model->GetArray(op->inputs[3]); - if (!stride_array.has_shape()) return false; + if (!stride_array.has_shape()) return ::tensorflow::Status::OK(); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; - if (!IsConstantParameterArray(*model, op->inputs[3])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[3])) + return ::tensorflow::Status::OK(); int num_input_axes = input_array.shape().dimensions_count(); int start_indices_size = start_array.shape().dims(0); @@ -112,6 +119,7 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) { op->stop_indices = stop_array.GetBuffer().data; op->strides = stride_array.GetBuffer().data; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc index fa5ee89933..4927ccd95d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc @@ -25,12 +25,15 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowConcat::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto concat_it = model->operators.begin() + op_index; const auto* tf_concat_op = concat_it->get(); if (tf_concat_op->type != OperatorType::kConcat && tf_concat_op->type != OperatorType::kConcatV2) { - return false; + return ::tensorflow::Status::OK(); } CHECK_GE(tf_concat_op->inputs.size(), 2); @@ -54,7 +57,7 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { if (!axis_array.buffer) { AddMessageF("Waiting for the axis of %s to be resolved to a constant", LogName(*tf_concat_op)); - return false; + return ::tensorflow::Status::OK(); } CHECK(axis_array.data_type == ArrayDataType::kInt32); @@ -79,7 +82,8 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { } // Remove the TensorFlowConcat op model->operators.erase(concat_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc index 65346c4fe4..da039da546 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc @@ -55,10 +55,13 @@ TransposeOperator* FindTransposeOpWithInput(const Model& model, } // namespace -bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowMatMul::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto matmul_it = model->operators.begin() + op_index; if (matmul_it->get()->type != OperatorType::kMatMul) { - return false; + return ::tensorflow::Status::OK(); } const auto* matmul_op = static_cast(matmul_it->get()); @@ -73,7 +76,7 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { "Not replacing %s by a FullyConnected operator, because it has " "the transpose_a attribute", LogName(*matmul_op)); - return false; + return ::tensorflow::Status::OK(); } // Reorder the axes on the second input. TensorFlow uses row-major ordering @@ -198,7 +201,8 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { // erase the MatMul operator model->operators.erase(matmul_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc index 4edffe3d48..9beea3e937 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc @@ -24,11 +24,14 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowMerge::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto merge_it = model->operators.begin() + op_index; const auto* merge_op = merge_it->get(); if (merge_op->type != OperatorType::kMerge) { - return false; + return ::tensorflow::Status::OK(); } // We need to yield until this Merge node has only 1 input, which will mean @@ -37,7 +40,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { // non-selected inputs, so that at some point there will be only 1 input left. if (merge_op->inputs.size() > 1) { AddMessageF("Waiting for %s to be resolved", LogName(*merge_op)); - return false; + return ::tensorflow::Status::OK(); } // Now that the merge node has 1 input exactly, it is the same as an Identity @@ -57,7 +60,8 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { AddMessageF("Removing already-resolved %s", LogName(*merge_op)); model->EraseArray(merge_op->outputs[0]); model->operators.erase(merge_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc index 8bef440afd..e215981b42 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc @@ -24,11 +24,14 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowSwitch::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto switch_it = model->operators.begin() + op_index; const auto* switch_op = switch_it->get(); if (switch_op->type != OperatorType::kSwitch) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(switch_op->inputs.size(), 2); @@ -40,7 +43,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { AddMessageF( "Waiting for the boolean predicate of %s to be resolved to a constant", LogName(*switch_op)); - return false; + return ::tensorflow::Status::OK(); } // The predicate should be boolean, and should consist of a single value. @@ -119,7 +122,8 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { // Remove the switch node itself. AddMessageF("Removing already-resolved %s", LogName(*switch_op)); model->operators.erase(switch_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc index a657ee00af..aa7945391c 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc @@ -24,19 +24,24 @@ limitations under the License. namespace toco { -bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTransposeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kTranspose) return false; + if (op_it->get()->type != OperatorType::kTranspose) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); - if (!op->perm.empty()) return false; + if (!op->perm.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 2); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); // Handling perm. const auto& perm_array = model->GetArray(op->inputs[1]); - if (!perm_array.has_shape()) return false; + if (!perm_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& perm_dims = perm_array.shape().dims(); CHECK_EQ(perm_dims.size(), 1); @@ -47,7 +52,8 @@ bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) { op->perm.push_back(perm_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc index 22c258cec5..e9f24a29ab 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc @@ -24,15 +24,17 @@ limitations under the License. namespace toco { -bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ShuffleFCWeights::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; Operator* op = model->operators[op_index].get(); if (op->type != OperatorType::kFullyConnected) { - return false; + return ::tensorflow::Status::OK(); } FullyConnectedOperator* fc_op = static_cast(op); // Exit if this FC op already has shuffled weights if (fc_op->weights_format != FullyConnectedWeightsFormat::kDefault) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(fc_op->inputs[0]); const string& weights_name = fc_op->inputs[1]; @@ -46,11 +48,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { output_array.data_type != ArrayDataType::kInt16 || !input_array.quantization_params || !weights_array.quantization_params || !output_array.quantization_params) { - return false; + return ::tensorflow::Status::OK(); } // Exit if the shapes aren't known if (!input_array.has_shape() || !weights_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Exit if, based on the known shapes, this FC op is not a GEMV. // The shuffling of FC weights is only useful to enable fast GEMV paths. @@ -64,7 +66,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "the input shape is not 1D or 2D (possibly with additional inner " "dimensions of size 1)", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } } if (input_shape.dims(0) != 1 && input_shape.dims(0) != 4) { @@ -73,7 +75,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "the input shape's leading dimension, i.e. the 'batch size', is not " "equal to 1 or 4", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights shape isn't an integral multiple of the shuffled // block shape, 4x16. We don't want to have to write code dealing with @@ -88,7 +90,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { // two. const Shape& weights_shape = weights_array.shape(); if (weights_shape.dimensions_count() != 2) { - return false; + return ::tensorflow::Status::OK(); } const int rows = weights_shape.dims(0); const int cols = weights_shape.dims(1); @@ -97,11 +99,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "Not applying experimental shuffling to the weights of %s because its " "shape isn't a multiple of the shuffling block shape, 4x16", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights aren't already a constant array. if (!weights_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights are used by more than one op. if (CountOpsWithInput(*model, weights_name) != 1) { @@ -109,7 +111,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "Not applying experimental shuffling to the weights of %s because that " "array is consumed by other operators", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Compute the shuffled weights auto& weights_data = @@ -152,7 +154,8 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { shuffled_input_workspace_array.GetOrCreateQuantizationParams() = input_array.GetQuantizationParams(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc index 66cfed4ac2..e2a6f12481 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc @@ -166,7 +166,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; @@ -185,7 +188,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; @@ -204,7 +210,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc index a53abc9941..57d85a0435 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc @@ -50,7 +50,8 @@ void RunResolveSum(const std::vector& input, sum_op->inputs = {"input0", "input1"}; sum_op->outputs = {"output"}; model.operators.push_back(std::move(sum_op)); - ResolveConstantUnaryOperator().Run(&model, 0); + bool modified; + ASSERT_TRUE(ResolveConstantUnaryOperator().Run(&model, 0, &modified).ok()); EXPECT_EQ(model.GetArray("output").GetBuffer().data, expected_output); EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc index 69bad2fa89..4ada5c3fd0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc @@ -25,13 +25,16 @@ limitations under the License. namespace toco { -bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnfuseActivationFunctions::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); // If a conv operation has an im2col array, yield: it should be dropped first. if ((op->type == OperatorType::kConv) && (op->outputs.size() == 2)) { - return false; + return ::tensorflow::Status::OK(); } Operator* ac_op = nullptr; @@ -46,7 +49,7 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { ac_op = new Relu1Operator; break; default: - return false; + return ::tensorflow::Status::OK(); } // At this point we know that the op has a fused activation function. At the @@ -74,7 +77,8 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { ac_op->inputs = {tmp_array_name}; op->outputs = {tmp_array_name}; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc index dd9e26e68b..e19527968d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -22,7 +22,10 @@ limitations under the License. namespace toco { -bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnpartitionEmbeddingLookup::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather. // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup // This transform attempts to identify the len(params) > 1 case and collapse @@ -47,7 +50,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { // First look for the final DynamicStitch. auto op_it = model->operators.begin() + op_index; if (op_it->get()->type != OperatorType::kDynamicStitch) { - return false; + return ::tensorflow::Status::OK(); } auto* stitch_op = static_cast(op_it->get()); @@ -72,7 +75,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because indices input %s into " "%s is unexpected", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } if (!indices_partition_op) { indices_partition_op = static_cast(op); @@ -83,7 +86,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because indices input %s into " "%s is from a different source op than others", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } } } @@ -92,12 +95,12 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { // The data for the indices must be a constant range of the array shape. if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) { AddMessageF("Skipping because indices partition data is non-constant"); - return false; + return ::tensorflow::Status::OK(); } auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]); if (indices_data_array.data_type == ArrayDataType::kNone) { // Yield until data types are propagated. - return false; + return ::tensorflow::Status::OK(); } CHECK(indices_data_array.data_type == ArrayDataType::kInt32) << "Indices partition inputs must be int32"; @@ -117,7 +120,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into %s " "is unexpected", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } gather_ops.push_back(static_cast(op)); } @@ -132,7 +135,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into " "%s is unexpected", LogName(*op), LogName(*gather_op)); - return false; + return ::tensorflow::Status::OK(); } if (!data_partition_op) { data_partition_op = static_cast(op); @@ -143,7 +146,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into " "%s is from a different source op than others", LogName(*op), LogName(*gather_op)); - return false; + return ::tensorflow::Status::OK(); } } } @@ -236,7 +239,8 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, indices_partition_op); DeleteOpAndArraysIfUnused(model, data_partition_op); DeleteOpAndArraysIfUnused(model, stitch_op); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc index fedf4441e2..5ff39aa313 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc @@ -36,10 +36,12 @@ namespace toco { // slice_c = tf.matmul(slice_a, slice_b) // result_slices[bat] = slice_c // result = tf.stack(result_slices) -bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnrollBatchMatMul::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto batch_op_it = model->operators.begin() + op_index; if (batch_op_it->get()->type != OperatorType::kBatchMatMul) { - return false; + return ::tensorflow::Status::OK(); } const auto* batch_op = static_cast(batch_op_it->get()); @@ -47,7 +49,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { // We must have the shape of at least one input to know our batch size. const auto& input_array_a = model->GetArray(batch_op->inputs[0]); const auto& input_array_b = model->GetArray(batch_op->inputs[1]); - if (!input_array_a.has_shape() || !input_array_b.has_shape()) return false; + if (!input_array_a.has_shape() || !input_array_b.has_shape()) + return ::tensorflow::Status::OK(); // We only support the rank 3 case. If you are batching on rank > 3 you'll // have to figure that out. @@ -66,7 +69,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { batch_op_it = matmul_op_it + 1; CHECK_EQ(batch_op_it->get(), batch_op); model->operators.erase(batch_op_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } CHECK_EQ(input_array_a.shape().dimensions_count(), 3) << "Input arrays must have rank 3"; @@ -167,7 +171,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { CHECK(batch_op_it != model->operators.end()); CHECK(batch_op_it->get() == batch_op); model->operators.erase(batch_op_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco -- GitLab From 072fcb995a3fd658ee2461b59b159498c710513d Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 9 Oct 2018 11:54:20 -0700 Subject: [PATCH 131/411] [tf.data] NUMA-aware MapAndBatch dataset. PiperOrigin-RevId: 216395709 --- ...f_ExperimentalNumaMapAndBatchDataset.pbtxt | 58 + tensorflow/core/framework/model.h | 2 +- .../core/grappler/optimizers/data/BUILD | 35 + .../optimizers/data/graph_test_utils.cc | 16 + .../optimizers/data/graph_test_utils.h | 6 + .../map_and_batch_numa_aware_replacement.cc | 62 + .../map_and_batch_numa_aware_replacement.h | 48 + ...p_and_batch_numa_aware_replacement_test.cc | 112 ++ .../core/kernels/data/experimental/BUILD | 17 + .../numa_map_and_batch_dataset_op.cc | 1135 +++++++++++++++++ .../kernels/data/map_and_batch_dataset_op.cc | 38 +- .../core/ops/experimental_dataset_ops.cc | 26 + .../kernel_tests/map_and_batch_test.py | 280 +++- .../kernel_tests/optimization/BUILD | 2 + .../optimization/model_dataset_op_test.py | 11 +- .../optimization/optimize_dataset_op_test.py | 16 + .../kernel_tests/serialization/BUILD | 15 + ...ap_and_batch_dataset_serialization_test.py | 95 ++ tensorflow/python/data/experimental/ops/BUILD | 1 + tensorflow/python/data/ops/dataset_ops.py | 7 +- .../golden/v1/tensorflow.data.-options.pbtxt | 4 + .../golden/v2/tensorflow.data.-options.pbtxt | 4 + 22 files changed, 1909 insertions(+), 81 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc create mode 100644 tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt new file mode 100644 index 0000000000..243922d969 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt @@ -0,0 +1,58 @@ +op { + graph_op_name: "ExperimentalNumaMapAndBatchDataset" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: <{}}}); } +NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name, + StringPiece batch_size_node_name, + StringPiece num_parallel_calls_node_name, + StringPiece drop_remainder_node_name, + StringPiece function_name) { + return test::function::NDef( + name, "MapAndBatchDatasetV2", + {string(input_node_name), "", string(batch_size_node_name), + string(num_parallel_calls_node_name), string(drop_remainder_node_name)}, + {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))}, + {"Targuments", {}}, + {"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}}); +} + } // end namespace graph_tests_utils } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h index ca0fde997d..f7891d5e1f 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h +++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h @@ -29,6 +29,12 @@ NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name, NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name, StringPiece function_name = "IsZero"); +NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name, + StringPiece batch_size_node_name, + StringPiece num_parallel_calls_node_name, + StringPiece drop_remainder_node_name, + StringPiece function_name = "XTimesTwo"); + } // end namespace graph_tests_utils } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc new file mode 100644 index 0000000000..452089eb67 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h" + +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/mutable_graph_view.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" + +namespace tensorflow { +namespace grappler { +namespace { + +NodeDef MakeNumaAware(const NodeDef& node, MutableGraphView* graph) { + NodeDef numa_aware_node = node; + graph_utils::SetUniqueGraphNodeName("map_and_batch_numa_aware", + graph->GetGraph(), &numa_aware_node); + numa_aware_node.set_op("ExperimentalNumaMapAndBatchDataset"); + return numa_aware_node; +} + +} // namespace + +Status MapAndBatchNumaAwareReplacement::Optimize(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output) { + *output = item.graph; + MutableGraphView graph(output); + std::set nodes_to_delete; + + for (const NodeDef& node : item.graph.node()) { + if (node.op() != "MapAndBatchDatasetV2") continue; + + auto* numa_node = graph.AddNode(MakeNumaAware(node, &graph)); + graph.ReplaceInput(node, *numa_node); + nodes_to_delete.insert(node.name()); + } + graph.DeleteNodes(nodes_to_delete); + return Status::OK(); +} + +REGISTER_GRAPH_OPTIMIZER_AS(MapAndBatchNumaAwareReplacement, + "map_and_batch_numa_aware_replacement"); + +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h new file mode 100644 index 0000000000..3b2acd288b --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h @@ -0,0 +1,48 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +class MapAndBatchNumaAwareReplacement : public CustomGraphOptimizer { + public: + MapAndBatchNumaAwareReplacement() = default; + ~MapAndBatchNumaAwareReplacement() override = default; + + string name() const override { + return "map_and_batch_numa_aware_replacement"; + } + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override {} +}; + +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_ diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc new file mode 100644 index 0000000000..3c5c61d1c2 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc @@ -0,0 +1,112 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h" + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(MapAndBatchNumaAwareReplacementTest, ReplaceSimple) { + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + { + NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, {}), + NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}), + NDef("num_parallel_calls", "Const", {}, + {{"value", 5}, {"dtype", DT_INT32}}), + NDef("drop_remainder", "Const", {}, + {{"value", 0}, {"dtype", DT_BOOL}}), + graph_tests_utils::MakeMapAndBatchNode( + "map_and_batch", "range", "batch_size", "num_parallel_calls", + "drop_remainder"), + }, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + MapAndBatchNumaAwareReplacement optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output)); + EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp( + "ExperimentalNumaMapAndBatchDataset", output)); +} + +TEST(MapAndBatchNumaAawareReplacementTest, ReplaceWithExtraChild) { + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + { + NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, {}), + NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}), + NDef("num_parallel_calls", "Const", {}, + {{"value", 5}, {"dtype", DT_INT32}}), + NDef("drop_remainder", "Const", {}, + {{"value", 0}, {"dtype", DT_BOOL}}), + graph_tests_utils::MakeMapAndBatchNode( + "map_and_batch", "range", "batch_size", "num_parallel_calls", + "drop_remainder"), + NDef("cache", "CacheDataset", {"map_and_batch"}, {}), + }, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + MapAndBatchNumaAwareReplacement optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output)); + EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp( + "ExperimentalNumaMapAndBatchDataset", output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp("CacheDataset", output)); + + int numa_map_and_batch_component_id = graph_utils::FindGraphNodeWithOp( + "ExperimentalNumaMapAndBatchDataset", output); + auto& numa_map_and_batch_component = + output.node(numa_map_and_batch_component_id); + EXPECT_EQ(numa_map_and_batch_component.input(0), "range"); + + int cache_id = graph_utils::FindGraphNodeWithOp("CacheDataset", output); + auto& cache_node = output.node(cache_id); + EXPECT_EQ(cache_node.input(0), numa_map_and_batch_component.name()); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index 43406db3ed..4cf5643bc0 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -102,6 +102,22 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "numa_map_and_batch_dataset_op", + srcs = ["numa_map_and_batch_dataset_op.cc"], + deps = [ + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:experimental_dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:inplace_ops", + "//tensorflow/core/kernels/data:captured_function", + "//tensorflow/core/kernels/data:dataset", + "@com_google_absl//absl/memory", + ], +) + tf_kernel_library( name = "unique_dataset_op", srcs = ["unique_dataset_op.cc"], @@ -132,6 +148,7 @@ tf_kernel_library( ":ignore_errors_dataset_op", ":indexed_dataset", ":lmdb_dataset_op", + ":numa_map_and_batch_dataset_op", ":prefetching_kernels", ":threadpool_dataset_op", ":unique_dataset_op", diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc new file mode 100644 index 0000000000..d83edb9667 --- /dev/null +++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc @@ -0,0 +1,1135 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#define EIGEN_USE_THREADS + +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" +#include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/inplace_ops_functor.h" +#include "tensorflow/core/lib/core/blocking_counter.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/numa.h" +#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace tensorflow { +namespace data { +namespace { + +// kWindowSize is the fixed constant controlling the number of batch outputs +// each NumaWorkerBlock may be processing at a time. This is currently a +// constant and not user configurable to enable future performance optimizations +// in the implementation. +const int64 kWindowSize = 10; + +// Define a helper for more consistent logging. +#define WORKER_VLOG(verbose_level) \ + VLOG(verbose_level) << "WorkerThread (" << numa_node << ", " << thread_num \ + << "): " + +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following op. + +class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel { + public: + explicit NumaMapAndBatchDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + } + + protected: + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + int64 batch_size; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "batch_size", &batch_size)); + OP_REQUIRES( + ctx, batch_size > 0, + errors::InvalidArgument("batch_size must be greater than zero.")); + + int64 num_parallel_calls; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls", + &num_parallel_calls)); + OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune, + errors::InvalidArgument( + "num_parallel_calls must be greater than zero.")); + + bool drop_remainder; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "drop_remainder", &drop_remainder)); + + std::unique_ptr captured_func; + OP_REQUIRES_OK( + ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + /* use_inter_op_parallelism = */ false, + &captured_func)); + + *output = new Dataset(ctx, input, batch_size, num_parallel_calls, + drop_remainder, output_types_, output_shapes_, func_, + std::move(captured_func)); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size, + int64 num_parallel_calls, bool drop_remainder, + const DataTypeVector& output_types, + const std::vector& output_shapes, + const NameAttrList& func, + std::unique_ptr captured_func) + : DatasetBase(DatasetContext(ctx)), + input_(input), + batch_size_(batch_size), + num_parallel_calls_(num_parallel_calls), + drop_remainder_(drop_remainder), + output_types_(output_types), + output_shapes_(output_shapes), + func_(func), + captured_func_(std::move(captured_func)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::NumaMapAndBatch")})); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { + return "NumaMapAndBatchDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); + Node* batch_size_node; + TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size_node)); + Node* num_parallel_calls_node; + TF_RETURN_IF_ERROR( + b->AddScalar(num_parallel_calls_, &num_parallel_calls_node)); + Node* drop_remainder_node; + TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder_node)); + + DataTypeVector other_arguments_types; + other_arguments_types.reserve(captured_func_->captured_inputs().size()); + std::vector other_arguments; + other_arguments.reserve(captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f; + b->BuildAttrValue(func_, &f); + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, + {std::make_pair(0, input_graph_node), + std::make_pair(2, batch_size_node), + std::make_pair(3, num_parallel_calls_node), + std::make_pair(4, drop_remainder_node)}, // Single tensor inputs. + {std::make_pair(1, other_arguments)}, // Tensor list inputs. + {std::make_pair("f", f), + std::make_pair("Targuments", other_arguments_types_attr)}, // Attrs + output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + mu_(std::make_shared()), + autotune_cond_var_(std::make_shared()), + num_parallel_calls_(std::make_shared( + params.dataset->num_parallel_calls_, mu_, autotune_cond_var_)) { + } + + ~Iterator() override { + mutex_lock l(*mu_); + cancelled_ = true; + VLOG(3) << "NumaMapAndBatchIterator::~Iterator: cancelling operations."; + for (size_t i = 0; i < workers_.size(); ++i) { + workers_[i]->manager.Cancel(); + } + VLOG(3) << "NumaMapAndBatchIterator::~Iterator: waiting for threads to " + "shut down."; + } + + Status Initialize(IteratorContext* ctx) override { + mutex_lock l(*mu_); + AddConstantParameter(ctx, "batch_size", dataset()->batch_size_); + if (num_parallel_calls_->value == kAutoTune) { + num_parallel_calls_->value = std::max(1, port::NUMANumNodes()); + AddTunableParameter(ctx, + /* name = */ "parallelism", + /* state = */ num_parallel_calls_, + /* min = */ num_parallel_calls_->value, + /* max = */ port::NumSchedulableCPUs()); + } else { + AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value); + } + TF_RETURN_IF_ERROR( + dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); + TF_RETURN_IF_ERROR(dataset()->captured_func_->Instantiate(ctx)); + return Status::OK(); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + auto cleanup = gtl::MakeCleanup( + [] { VLOG(3) << "GetNextInternal call returning."; }); + NumaWorkerBlock* worker = nullptr; + { + mutex_lock l(*mu_); + VLOG(3) << "GetNextInternal call; current block: " << cur_block_; + if (global_end_of_input_) { + *end_of_sequence = true; + return Status::OK(); + } + TF_RETURN_IF_ERROR(EnsureBackgroundThreadsStarted(ctx)); + worker = workers_[cur_block_].get(); + cur_block_ = (cur_block_ + 1) % workers_.size(); + } + TF_RETURN_IF_ERROR(worker->manager.GetBatch( + ctx, dataset()->drop_remainder_, &global_end_of_input_, out_tensors, + end_of_sequence)); + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(*mu_); + for (size_t i = 0; i < workers_.size(); ++i) { + if (!workers_[i]->manager.Quiesce()) { + return errors::Cancelled( + "The iterator was deleted before it could reach a " + "checkpointable state."); + } + } + + TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("num_workers"), workers_.size())); + + for (size_t i = 0; i < workers_.size(); ++i) { + size_t index = (cur_block_ + i) % workers_.size(); + TF_RETURN_IF_ERROR(workers_[index]->manager.Save(writer, this, i)); + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(*mu_); + TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_)); + int64 num_workers = -1; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("num_workers"), &num_workers)); + // Note: num_workers can be 0 if the iterator wasn't started when + // first checkpointed. + if (num_workers < 0) { + return errors::DataLoss( + "When restoring from checkpoint, we encountered a data " + "consistency error: num_workers has an invalid value: ", + num_workers); + } + if (port::NUMAEnabled()) { + int actual_numa_domains = port::NUMANumNodes(); + if (actual_numa_domains != num_workers && num_workers > 0) { + LOG(WARNING) << "# NUMA domains mismatch when restoring from " + "checkpoint: checkpoint has " + << num_workers + << " NUMA domains, while this host has: " + << actual_numa_domains << " NUMA domains."; + } + } + if (num_workers > 1 && !port::NUMAEnabled()) { + LOG(WARNING) << "NUMA is not enabled for this process, but restoring " + "a checkpoint that assumes " + << num_workers << " NUMA domains."; + } + workers_.resize(num_workers); + for (size_t i = 0; i < num_workers; ++i) { + workers_[i] = MakeUnique(this); + TF_RETURN_IF_ERROR( + workers_[i]->manager.Restore(ctx, reader, this, i)); + } + cur_block_ = 0; + return Status::OK(); + } + + private: + // NumaBlockManager manages all the state for a set of threads pinned to a + // single NUMA domain. + // + // The methods can be divided into 3 categories based on who should call + // them: + // + // (1) RunnerThread: WaitForInputSpace, PushInputs, SetEndOfInput. + // (2) WorkerThread: RetrieveInput, GetBatchTensors. + // RecordBatchEntryComplete + // (3) Client threads: GetBatch, Cancel, Save, Restore. + // + // Internally, we manage state in a circular buffer of size `kWindowSize`. + // There are 3 pointers into the circular buffer, and must maintain the + // following order: (1) next_input_batch_ (corresponding to the next input + // batch to be pulled from the input iterator), (2) next_input_ + // (corresponding to the batch the WorkerThreads should pull from for + // their next inputs), and (3) next_output_ corresponding to the next + // value to be consumed by the output iterator. + // + // Methods return errors::Cancelled if the iteration is cancelled before + // completing. + // + // NumaBlockManager is thread safe. + class NumaBlockManager { + public: + explicit NumaBlockManager(Iterator* itr) : itr_(itr) {} + + // WaitForInputSpace blocks until there is space in the circular buffer + // to begin processing a new batch of elements. + // + // Returns true when there is space, false if the Iterator is cancelled. + bool WaitForInputSpace(IteratorContext* ctx) { + mutex_lock l(mu_); + + size_t next = (next_input_batch_ + 1) % kWindowSize; + DCHECK(next < kWindowSize) << next; + + // Wait for space in the circular buffer. + while (!cancelled_ && batches_[next].state != BatchState::kEmpty) { + VLOG(3) << "Waiting for input space; next: " << next + << ", next_output_: " << next_output_ + << ", next_input_batch_: " << next_input_batch_; + itr_->RecordStop(ctx); + runner_cond_var_.wait(l); + itr_->RecordStart(ctx); + } + if (cancelled_) { + VLOG(3) << "WaitForInputSpace cancelled."; + return false; + } + + DCHECK(batches_[next].state == BatchState::kEmpty); + + next_input_batch_ = next; + return true; + } + + // PushInputs sets the inputs for the next batch as retrieved from the + // input iterator. + void PushInputs(const Status& status, + std::vector> inputs) { + mutex_lock l(mu_); + + DCHECK(next_input_ < kWindowSize) << next_input_; + DCHECK(batches_[next_input_batch_].state == BatchState::kEmpty); + DCHECK(batches_[next_input_batch_].next_input_to_process == 0) + << batches_[next_input_batch_].next_input_to_process; + DCHECK(batches_[next_input_batch_].status.ok()) + << batches_[next_input_batch_].status; + + batches_[next_input_batch_].inputs.swap(inputs); + batches_[next_input_batch_].state = BatchState::kInputsFilled; + batches_[next_input_batch_].status.Update(status); + if (batches_[next_input_batch_].status.ok()) { + worker_cond_var_.notify_all(); + } else { + client_cond_var_.notify_all(); + batches_[next_input_batch_].error_index = 0; + } + } + + // SetEndOfInput records the fact that we have reached the end of the + // input iterator, and that we should return end_of_sequence = true when + // we have exhaused all buffered batches. + void SetEndOfInput() { + mutex_lock l(mu_); + reached_eof_ = true; + worker_cond_var_.notify_all(); + client_cond_var_.notify_all(); + } + + // RetrieveInput gets the next input tuple to be mapped by a worker + // thread. + // + // Returns true if an input was retrieved, false if the iterator has + // been cancelled. + bool RetrieveInput(IteratorContext* ctx, std::vector* input, + uint64* index, size_t* sequence_number) { + mutex_lock l(mu_); + + // Wait for inputs to be ready. + while (!cancelled_ && + batches_[next_input_].state != BatchState::kInputsFilled) { + itr_->RecordStop(ctx); + worker_cond_var_.wait(l); + itr_->RecordStart(ctx); + } + + if (cancelled_) { + return false; + } + + DCHECK(batches_[next_input_].next_input_to_process < + batches_[next_input_].inputs.size()) + << "next_input_: " << next_input_ << ", next_input_to_process: " + << batches_[next_input_].next_input_to_process + << ", inputs.size(): " << batches_[next_input_].inputs.size() + << ", state: " << static_cast(batches_[next_input_].state) + << ", this: " << this; + *index = batches_[next_input_].next_input_to_process; + *sequence_number = next_input_; + input->swap(batches_[next_input_] + .inputs[batches_[next_input_].next_input_to_process]); + // Increment pointers. + batches_[next_input_].next_input_to_process++; + + if (batches_[next_input_].next_input_to_process == + batches_[next_input_].inputs.size()) { + batches_[next_input_].state = BatchState::kAllMapsStarted; + next_input_ = (next_input_ + 1) % kWindowSize; + } + return true; + } + + // GetBatchTensors returns a pointer to the output batch tensors for the + // worker thread to copy into. + // + // allocate_output is a function taking a batch size, and a pointer to + // the output tuple of Tensors to allocate them. The allocate_output + // function is called at most once per output batch. + std::vector* GetBatchTensors( + size_t sequence_number, + std::function*)> allocate_output) { + mutex_lock l(mu_); + DCHECK(sequence_number < kWindowSize) << sequence_number; + DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled || + batches_[sequence_number].state == BatchState::kAllMapsStarted) + << sequence_number; + + if (batches_[sequence_number].outputs.empty()) { + allocate_output(batches_[sequence_number].inputs.size(), + &batches_[sequence_number].outputs); + } + return &batches_[sequence_number].outputs; + } + + // RecordBatchEntryComplete records an element of the batch has finished + // copying into the output tensors. + void RecordBatchEntryComplete(size_t sequence_number, uint64 index, + Status s) { + mutex_lock l(mu_); + DCHECK(sequence_number < kWindowSize) << sequence_number; + DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled || + batches_[sequence_number].state == BatchState::kAllMapsStarted) + << sequence_number; + + batches_[sequence_number].num_outputs_complete++; + if (!s.ok() && batches_[sequence_number].error_index > index) { + batches_[sequence_number].status = s; + batches_[sequence_number].error_index = index; + } + + if (batches_[sequence_number].num_outputs_complete == + batches_[sequence_number].inputs.size()) { + DCHECK(batches_[sequence_number].state == + BatchState::kAllMapsStarted); + batches_[sequence_number].state = BatchState::kOutputsComplete; + batches_[sequence_number].inputs.clear(); // Eagerly save memory. + batches_[sequence_number].inputs.shrink_to_fit(); + client_cond_var_.notify_all(); + } + } + + // GetBatch retrieves the next output batch tensors. + Status GetBatch(IteratorContext* ctx, bool drop_remainder, + bool* global_eof, std::vector* out_tensor, + bool* end_of_sequence) { + mutex_lock l(mu_); + // Wait until one of 3 conditions occurs: + // (1) we're cancelled. + // (2) the state becomes kOutputsComplete + // (3) state is empty && reached_eof. + while (!cancelled_ && + batches_[next_output_].state != BatchState::kOutputsComplete && + !(reached_eof_ && + batches_[next_output_].state == BatchState::kEmpty)) { + VLOG(3) << "Waiting in GetBatch."; + itr_->RecordStop(ctx); + client_cond_var_.wait(l); + itr_->RecordStart(ctx); + } + + if (cancelled_) { + return errors::Cancelled( + "Cancelled in NumaMapAndBatch::GetNext call."); + } + + if (reached_eof_ && + batches_[next_output_].state == BatchState::kEmpty) { + VLOG(4) << "GetBatch returning end of sequence."; + *end_of_sequence = true; + *global_eof = true; + return Status::OK(); + } + + VLOG(3) << "Returning output index: " << next_output_ + << ", this: " << this; + + *end_of_sequence = false; + Status s = batches_[next_output_].status; + if (s.ok()) { + out_tensor->swap(batches_[next_output_].outputs); + } + // Handle early termination. + if (errors::IsOutOfRange(s)) { + *global_eof = true; + s = Status::OK(); + if (drop_remainder || batches_[next_output_].error_index == 0) { + *end_of_sequence = true; + } else { + std::vector true_outputs; + for (size_t i = 0; i < batches_[next_output_].outputs.size(); + ++i) { + TensorShape component_shape( + batches_[next_output_].outputs[i].shape()); + component_shape.set_dim(0, batches_[next_output_].error_index); + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + Tensor component(ctx->allocator(attr), + batches_[next_output_].outputs[i].dtype(), + component_shape); + TF_RETURN_IF_ERROR(CopyPartialBatch( + &component, batches_[next_output_].outputs[i], + batches_[next_output_].error_index)); + true_outputs.emplace_back(std::move(component)); + } + out_tensor->swap(true_outputs); + } + } + + batches_[next_output_].Reset(); + next_output_ = (next_output_ + 1) % kWindowSize; + runner_cond_var_.notify_all(); + + return s; + } + + void Cancel() { + mutex_lock l(mu_); + VLOG(3) << "Cancelling NUMA block."; + cancelled_ = true; + runner_cond_var_.notify_all(); + worker_cond_var_.notify_all(); + client_cond_var_.notify_all(); + } + + // Waits until all the worker threads have completed their work and all + // internal state has reached a "safe-point" where we can safely + // checkpoint. + // + // Returns true if completed successfully, false if cancelled while + // waiting. + bool Quiesce() { + mutex_lock l(mu_); + VLOG(3) << "Waiting until the operations have quiesced."; + while (!cancelled_ && !AllMapOperationsFinished()) { + client_cond_var_.wait(l); + } + if (cancelled_) { + return false; + } + return true; + } + + Status Save(IteratorStateWriter* writer, Iterator* itr, size_t index) { + mutex_lock l(mu_); + string prefix = itr->full_name(strings::StrCat("numa_block_", index)); + if (reached_eof_) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat(prefix, "_end_of_input"), "")); + } + for (size_t i = 0; i < kWindowSize; ++i) { + size_t index = (next_output_ + i) % kWindowSize; + if (batches_[index].state == BatchState::kEmpty) { + break; + } + string batch_prefix = strings::StrCat(prefix, "_batch_", i); + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat(batch_prefix, "_code"), + static_cast(batches_[index].status.code()))); + if (!batches_[index].status.ok()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(strings::StrCat(batch_prefix, "_msg"), + batches_[index].status.error_message())); + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat(batch_prefix, "_error_index"), + batches_[index].error_index)); + } + + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat(batch_prefix, "_output_size"), + batches_[index].outputs.size())); + for (size_t j = 0; j < batches_[index].outputs.size(); ++j) { + string tensor_prefix = + strings::StrCat(batch_prefix, "_output_", j); + if (!batches_[index].status.ok()) { + DCHECK(batches_[index].error_index >= 0 && + batches_[index].error_index < + itr_->dataset()->batch_size_); + // If the batch is not full, we only store the first + // `error_index` values. The rest of the batch tensor might not + // be initialized, and accessing that will raise msan errors. + TF_RETURN_IF_ERROR(writer->WriteTensor( + tensor_prefix, batches_[index].outputs[j].Slice( + 0, batches_[index].error_index))); + } else { + TF_RETURN_IF_ERROR(writer->WriteTensor( + tensor_prefix, batches_[index].outputs[j])); + } + } + } + return Status::OK(); + } + + Status Restore(IteratorContext* ctx, IteratorStateReader* reader, + Iterator* itr, size_t index) { + mutex_lock l(mu_); + if (reached_eof_) { + return errors::FailedPrecondition( + "Already reached the end of the sequence."); + } + string prefix = itr->full_name(strings::StrCat("numa_block_", index)); + reached_eof_ = + reader->Contains(strings::StrCat(prefix, "_end_of_input")); + for (size_t i = 0; i < kWindowSize; ++i) { + string batch_prefix = strings::StrCat(prefix, "_batch_", i); + if (!reader->Contains(strings::StrCat(batch_prefix, "_code"))) { + break; + } + Batch batch; + batch.state = BatchState::kOutputsComplete; + int64 code_int; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat(batch_prefix, "_code"), &code_int)); + error::Code code = static_cast(code_int); + if (code != error::Code::OK) { + string error_message; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat(batch_prefix, "_msg"), &error_message)); + batch.status = Status(code, error_message); + int64 error_index_int = -1; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat(batch_prefix, "_error_index"), + &error_index_int)); + if (error_index_int < 0 || + error_index_int > itr->dataset()->batch_size_) { + return errors::FailedPrecondition( + "Error index out of bounds when restoring from checkpoint; " + "error index: ", + error_index_int); + } + batch.error_index = static_cast(error_index_int); + } + int64 output_size = -1; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat(batch_prefix, "_output_size"), &output_size)); + batch.outputs.reserve(output_size); + for (size_t j = 0; j < output_size; ++j) { + string tensor_name = strings::StrCat(batch_prefix, "_output_", j); + Tensor t; + TF_RETURN_IF_ERROR(reader->ReadTensor(tensor_name, &t)); + batch.outputs.emplace_back(std::move(t)); + } + batches_[i] = std::move(batch); + } + return Status::OK(); + } + + private: + bool AllMapOperationsFinished() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + for (size_t i = 0; i < kWindowSize; ++i) { + if (batches_[i].state == BatchState::kInputsFilled || + batches_[i].state == BatchState::kAllMapsStarted) { + return false; + } + if (batches_[i].state != BatchState::kOutputsComplete && + !reached_eof_) { + return false; + } + } + return true; + } + + // Batches begin in the `kEmpty` state. Once the RunnerThread has + // filled the `inputs` to a `Batch`, it transitions to the + // `kInputsFilled` state. At this point, the Worker threads run the map + // function and copy the outputs appropriately. Once all worker threads + // have started, it transitions to `kAllMapsStarted`. After the outputs + // are complete, the GetNext call can consume the outputs, and return + // the batch to the kEmpty state. + enum class BatchState { + kEmpty, + kInputsFilled, + kAllMapsStarted, + kOutputsComplete, + }; + + // Batch captures all the state of an output batch as it progresses + // through the machinery. Once the RunnerThread fills inputs, it + // transitions to `kInputsFilled`. At this point, the worker threads can + // work on it, incrementing outputs_complete for every element of the + // input set that is copied into the output Tensors. Once all the input + // tuples have been processed (i.e. num_outputs_complete == + // inputs.size()), it transitions to the `kOutputsComplete` stage, where + // it is ready to be returned by a `GetBatch` call (called from + // `GetNextInternal`). + struct Batch { + BatchState state; + // Aggregates the Status of the input iterator's GetNext + // calls, in addition to the Status of the map function invocations. + // + // In the case where multiple non-OK statuses are encountered, we + // return the first one encountered. + Status status; + // In order to return the correct error status, we keep track of the + // error_index. + size_t error_index; + // The batch_size input tuples (or fewer in the case of the last + // batch). + // TODO(saeta): Avoid re-allocating vectors all the time! + std::vector> inputs; + std::vector outputs; + size_t next_input_to_process; + size_t num_outputs_complete; + + Batch() { Reset(); } + + // Resets the Batch state (e.g. after consuming the outputs). + void Reset() { + state = BatchState::kEmpty; + status = Status::OK(); + inputs.clear(); + inputs.shrink_to_fit(); + outputs.clear(); + outputs.shrink_to_fit(); + next_input_to_process = 0; + num_outputs_complete = 0; + error_index = -1; + } + }; + + Iterator* itr_; // Not owned. + mutex mu_; + Batch batches_[kWindowSize] GUARDED_BY(mu_); + size_t next_input_batch_ GUARDED_BY(mu_) = -1; + size_t next_input_ GUARDED_BY(mu_) = 0; + size_t next_output_ GUARDED_BY(mu_) = 0; + bool cancelled_ GUARDED_BY(mu_) = false; + bool reached_eof_ GUARDED_BY(mu_) = false; + + // The runner thread waits on this condition variable for space to be + // available. When the client thread takes a value out of the circular + // buffer, it notifies this condition variable that space is now + // available. + condition_variable runner_cond_var_ GUARDED_BY(mu_); + // The worker threads wait on this condition variable for available + // inputs. When the runner thread makes new inputs available, it + // notifies this condition variable. + condition_variable worker_cond_var_ GUARDED_BY(mu_); + // The client threads wait on this condition variable for avaiable + // batched outputs. When worker threads complete a batch, they notify + // this condition variable. + condition_variable client_cond_var_ GUARDED_BY(mu_); + }; + // Mark NumaBlockManager as a friend of Iterator in order to call + // protected Iterator methods during checkpointing. + friend NumaBlockManager; + + struct NumaWorkerBlock { + NumaBlockManager manager; + // TODO(saeta): Migrate to BackgroundWorker. + std::vector> threads; + + explicit NumaWorkerBlock(Iterator* itr) : manager(itr) {} + }; + + static void CustomNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) { + ptr->~NumaWorkerBlock(); + port::NUMAFree(ptr, sizeof(NumaWorkerBlock)); + } + static void DefaultNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) { + delete ptr; + } + + static Status CopyPartialBatch(Tensor* output, const Tensor& value, + int64 num_elements) { + switch (value.dtype()) { +#define HANDLE_TYPE(type) \ + case DataTypeToEnum::value: { \ + auto output_t = output->flat_outer_dims(); \ + auto value_t = value.flat_outer_dims(); \ + for (size_t i = 0; i < num_elements; i++) { \ + output_t.template chip<0>(i) = value_t.template chip<0>(i); \ + } \ + return Status::OK(); \ + } + TF_CALL_DATASET_TYPES(HANDLE_TYPE); +#undef HANDLE_TYPE + default: + return errors::InvalidArgument("Unsupported data type: ", + DataTypeString(value.dtype())); + } + return Status::OK(); + } + + Status EnsureBackgroundThreadsStarted(IteratorContext* ctx) + EXCLUSIVE_LOCKS_REQUIRED(*mu_) { + if (curr_num_parallel_calls_ >= num_parallel_calls_->value) { + // All necessary threads have been started. + curr_num_parallel_calls_ = num_parallel_calls_->value; + return Status::OK(); + } + + VLOG(4) << "Starting workers"; + bool numa_enabled = port::NUMAEnabled(); + + if (!numa_enabled) { + LOG(INFO) << "NUMA not enabled on this host."; + } + + int num_numa_nodes = port::NUMANumNodes(); + if (num_numa_nodes < 1) { + return errors::Internal("The number of NUMA nodes is invalid: ", + num_numa_nodes); + } + + // Only resize when empty to support restoring from checkpoints. + if (workers_.empty()) { + VLOG(3) << "# NUMA Nodes: " << num_numa_nodes + << ", # Parallel Calls: " << num_parallel_calls_->value; + workers_.resize(num_numa_nodes); + } else { + num_numa_nodes = workers_.size(); + } + + // Round up num_parallel_calls, with a minimum of 1. + const size_t num_threads_per_block = + std::max(1LL, (num_parallel_calls_->value + num_numa_nodes - 1) / + num_numa_nodes); + + VLOG(3) << "Starting " << num_threads_per_block * num_numa_nodes + << " worker threads, with " << num_threads_per_block + << " threads per block."; + + // Only allocate new_ctx if required. + std::shared_ptr new_ctx; + + for (int i = 0; i < num_numa_nodes; ++i) { + if (!workers_[i]) { + if (numa_enabled) { + // Allocate in appropriate NUMA domain. + // 4k page align. + void* ptr = port::NUMAMalloc(i, sizeof(NumaWorkerBlock), 0); + if (ptr != nullptr) { + NumaWorkerBlock* block = new (ptr) NumaWorkerBlock(this); + workers_[i] = + std::unique_ptr>( + block, CustomNumaWorkerBlockDeleter); + } else { + LOG(ERROR) << "Could not NUMA-allocate worker block: " << i; + } + } + // If the NUMA allocation fails, or NUMA is not enabled. + if (!workers_[i]) { + workers_[i] = + std::unique_ptr>( + new NumaWorkerBlock(this), DefaultNumaWorkerBlockDeleter); + } + } + // Be sure to start threads if num_parallel_calls_ has changed. + for (size_t j = workers_[i]->threads.size(); + j < num_threads_per_block; ++j) { + VLOG(3) << "Starting worker " << i << ", " << j; + if (!new_ctx) { + new_ctx = std::make_shared(*ctx); + } + workers_[i]->threads.emplace_back(ctx->env()->StartThread( + {}, + strings::StrCat("numa_map_and_batch_block_", i, "_thread_", j), + [this, new_ctx, i, j]() { WorkerThread(new_ctx, i, j); })); + VLOG(3) << "Worker " << i << ", " << j << " successfully started."; + } + } + if (!runner_thread_) { + if (!new_ctx) { + new_ctx = std::make_shared(*ctx); + } + runner_thread_.reset(ctx->env()->StartThread( + {}, "numa_map_runner_thread", + [this, new_ctx] { RunnerThread(new_ctx); })); + } + VLOG(3) << "All workers & runner thread started."; + return Status::OK(); + } + + void AllocateOutput(IteratorContext* ctx, size_t batch_size, + const std::vector& map_fn_outputs, + std::vector* batch_outputs) { + DCHECK(dataset()->output_dtypes().size() == + dataset()->output_shapes().size()); + DCHECK(map_fn_outputs.size() == dataset()->output_dtypes().size()); + for (size_t i = 0; i < dataset()->output_dtypes().size(); ++i) { + TensorShape component_shape({static_cast(batch_size)}); + component_shape.AppendShape(map_fn_outputs.at(i).shape()); + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + Tensor component(ctx->allocator(attr), map_fn_outputs.at(i).dtype(), + component_shape); + batch_outputs->emplace_back(std::move(component)); + } + } + + void RunnerThread(std::shared_ptr ctx) + LOCKS_EXCLUDED(mu_) { + RecordStart(ctx.get()); + auto cleanup = gtl::MakeCleanup([this, &ctx] { + // Set end of input on all the managers in order to clean up in an + // orderly fashion. + VLOG(3) << "Setting End of Input on workers_[*]->manager"; + for (size_t i = 0; i < workers_.size(); ++i) { + workers_[i]->manager.SetEndOfInput(); + } + RecordStop(ctx.get()); + }); + + const size_t num_blocks = workers_.size(); + + while (true) { + for (size_t block = 0; block < num_blocks; ++block) { + VLOG(4) << "RunnerThread waiting for input space in block: " + << block; + if (TF_PREDICT_FALSE( + !workers_[block]->manager.WaitForInputSpace(ctx.get()))) { + VLOG(3) << "RunnerThread exiting due to cancellation."; + return; + } + VLOG(4) << "RunnerThread has space; pulling on upstream for block " + << block; + + Status s; + std::vector> inputs; + bool end_of_sequence = false; + for (size_t i = 0; i < dataset()->batch_size_; ++i) { + std::vector tuple; + s.Update( + input_impl_->GetNext(ctx.get(), &tuple, &end_of_sequence)); + if (!s.ok()) { + break; + } + if (end_of_sequence) { + VLOG(4) << "Runner thread encountered end of sequence."; + if (dataset()->drop_remainder_) { + return; + } + break; + } + inputs.push_back(std::move(tuple)); + } + + VLOG(4) << "Moving inputs to block " << block + << ", which has size: " << inputs.size(); + if (!s.ok() || !inputs.empty()) { + workers_[block]->manager.PushInputs(s, std::move(inputs)); + VLOG(4) << "Inputs moved into block " << block; + } + if (end_of_sequence) { + return; + } + } + } + } + + void WorkerThread(std::shared_ptr ctx, + const int numa_node, const int thread_num) { + RecordStart(ctx.get()); + WORKER_VLOG(3) << "started."; + auto stop_cleanup = + gtl::MakeCleanup([this, numa_node, thread_num, &ctx]() { + RecordStop(ctx.get()); + WORKER_VLOG(3) << "exiting."; + }); + + NumaWorkerBlock* block = workers_[numa_node].get(); + port::NUMASetThreadNodeAffinity(numa_node); + const int num_numa_nodes = port::NUMANumNodes(); + const int minimum_num_parallel_calls = thread_num * num_numa_nodes; + + while (true) { + // Put threads to sleep based on autotuner. + { + mutex_lock l(*mu_); + while (minimum_num_parallel_calls >= num_parallel_calls_->value && + !cancelled_) { + RecordStop(ctx.get()); + autotune_cond_var_->wait(l); + RecordStart(ctx.get()); + } + if (cancelled_) { + return; + } + } + + std::vector input; + uint64 index = 0; + size_t sequence_number = 0; + WORKER_VLOG(4) << "retrieving input."; + { + tracing::ScopedActivity trace( + "NumaMapAndBatch::Iterator::Worker::RetrieveInput"); + if (!block->manager.RetrieveInput(ctx.get(), &input, &index, + &sequence_number)) { + return; + } + } + + WORKER_VLOG(4) << "retrieved input; index: " << index + << ", sequence_number: " << sequence_number; + + std::vector return_values; + Status s; + { + tracing::ScopedActivity trace( + "NumaMapAndBatch::Iterator::Worker::FunctionExecution"); + s = dataset()->captured_func_->Run(ctx.get(), std::move(input), + &return_values); + } + WORKER_VLOG(4) << "ran function for index: " << index + << ", sequence_number: " << sequence_number; + + if (s.ok()) { + std::vector* output = block->manager.GetBatchTensors( + sequence_number, + [this, ctx, &return_values](size_t batch_size, + std::vector* output) { + AllocateOutput(ctx.get(), batch_size, return_values, output); + }); + WORKER_VLOG(4) << "copying tensors to batch output."; + { + tracing::ScopedActivity trace( + "NumaMapAndBatch::Iterator::Worker::BatchCopy"); + for (size_t i = 0; i < return_values.size() && s.ok(); ++i) { + Tensor& tensor = return_values.at(i); + Tensor* batch = &output->at(i); + if (tensor.NumElements() != + (batch->NumElements() / batch->dim_size(0))) { + s.Update(errors::InvalidArgument( + "Cannot add tensor to the batch: number of elements does " + "not match. Shapes are: [tensor]: ", + tensor.shape().DebugString(), + ", [batch]: ", batch->shape().DebugString())); + break; + } + s.Update(batch_util::CopyElementToSlice(std::move(tensor), + batch, index)); + } + } + } + + block->manager.RecordBatchEntryComplete(sequence_number, index, s); + WORKER_VLOG(4) << "finished index: " << index + << ", sequence_number: " << sequence_number; + } + } + + // mu_ protects shared internal state and is used to coordinate between + // the auto-tuner, client threads, worker threads, and the runner thread. + const std::shared_ptr mu_; + const std::shared_ptr autotune_cond_var_; + // The maximum number of parallel calls (can be auto-tuned). + const std::shared_ptr num_parallel_calls_; + + // Caches the last-seen value of num_parallel_calls_->value to + // short-circuit starting workers. + int64 curr_num_parallel_calls_ GUARDED_BY(*mu_) = 0; + + std::unique_ptr input_impl_; + int64 cur_block_ GUARDED_BY(*mu_) = 0; + bool global_end_of_input_ GUARDED_BY(*mu_) = false; + bool cancelled_ GUARDED_BY(*mu_) = false; + std::vector>> + workers_; // Const after initialization. + std::unique_ptr runner_thread_ GUARDED_BY(*mu_); + }; + + const DatasetBase* const input_; + const int64 batch_size_; + const int64 num_parallel_calls_; + const bool drop_remainder_; + const DataTypeVector output_types_; + const std::vector output_shapes_; + const NameAttrList func_; + const std::unique_ptr captured_func_; + }; + + DataTypeVector output_types_; + std::vector output_shapes_; + NameAttrList func_; +}; + +REGISTER_KERNEL_BUILDER( + Name("ExperimentalNumaMapAndBatchDataset").Device(DEVICE_CPU), + NumaMapAndBatchDatasetOp); + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index f45a239793..bae56828dc 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -324,6 +324,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } private: + // BatchResult encapsulates the output batch, as well as anciliary + // metadata required to execute the fused map-and-batch operation. struct BatchResult { explicit BatchResult(int64 batch_size) { end_of_input = false; @@ -331,11 +333,23 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { num_elements = 0; output_allocated = false; status = Status::OK(); + status_offset = -1; } - void UpdateStatus(const Status& s) { - mutex_lock l(mu); - status.Update(s); + // UpdateStatus updates the batch's aggregate Status. + // + // In order to ensure that exactly the first non-OK status is returned + // (required to make the behavior is observably identical to a + // sequential execution of map followed by batch), we must also keep + // track of the offset into the batch that produced `s`. + void UpdateStatus(const Status& s, int64 offset) { + if (TF_PREDICT_FALSE(!s.ok())) { + mutex_lock l(mu); + if (status.ok() || offset < status_offset) { + status = s; + status_offset = offset; + } + } } mutex mu; @@ -344,6 +358,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::vector output; bool output_allocated GUARDED_BY(mu); Status status GUARDED_BY(mu); + int64 status_offset GUARDED_BY(mu); // Counts the number of outstanding calls for this batch. int64 num_calls; // access guarded by owner's mutex }; @@ -379,7 +394,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::shared_ptr> return_values = std::make_shared>(); auto done = [this, ctx, result, return_values, offset](Status status) { - result->UpdateStatus(status); + result->UpdateStatus(status, offset); if (status.ok()) { EnsureOutputAllocated(ctx, result, return_values); for (size_t i = 0; i < return_values->size(); ++i) { @@ -389,11 +404,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { (batch->NumElements() / batch->dim_size(0))) { TensorShape batch_shape = batch->shape(); batch_shape.RemoveDim(0); - result->UpdateStatus(errors::InvalidArgument( - "Cannot add tensor to the batch: number of elements does " - "not match. Shapes are: [tensor]: ", - tensor.shape().DebugString(), - ", [batch]: ", batch_shape.DebugString())); + result->UpdateStatus( + errors::InvalidArgument( + "Cannot add tensor to the batch: number of elements " + "does " + "not match. Shapes are: [tensor]: ", + tensor.shape().DebugString(), + ", [batch]: ", batch_shape.DebugString()), + offset); break; } // TODO(mrry): Add a version of DoParallelConcat that allows us to @@ -402,7 +420,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status copy_status = ::tensorflow::functor::DoParallelConcat( *dataset()->device_, tensor, offset, batch); if (!copy_status.ok()) { - result->UpdateStatus(copy_status); + result->UpdateStatus(copy_status, offset); break; } } diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc index f6bd5dce26..bbbecc50f8 100644 --- a/tensorflow/core/ops/experimental_dataset_ops.cc +++ b/tensorflow/core/ops/experimental_dataset_ops.cc @@ -138,6 +138,32 @@ REGISTER_OP("ExperimentalAssertNextDataset") return shape_inference::ScalarShape(c); }); +REGISTER_OP("ExperimentalNumaMapAndBatchDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("batch_size: int64") + .Input("num_parallel_calls: int64") + .Input("drop_remainder: bool") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR( + c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR( + c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR( + c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); + REGISTER_OP("ExperimentalLMDBDataset") .Input("filenames: string") .Output("handle: variant") diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py index d444c4082e..5ead6d1c75 100644 --- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops from tensorflow.python.platform import test @@ -38,12 +39,17 @@ from tensorflow.python.platform import test class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): @parameterized.named_parameters( - ("Default", None, None), - ("SequentialCalls", 1, None), - ("ParallelCalls", 2, None), - ("ParallelBatches", None, 10), + ("Default", None, None, False), + ("SequentialCalls", 1, None, False), + ("ParallelCalls", 2, None, False), + ("ParallelBatches", None, 10, False), + ("DefaultNUMA", None, None, True), + ("SequentialCallsNUMA", 1, None, True), + ("ParallelCallsNUMA", 2, None, True), + ("ParallelBatchesNUMA", None, 10, True), ) - def testMapAndBatch(self, num_parallel_calls, num_parallel_batches): + def testMapAndBatch(self, num_parallel_calls, num_parallel_batches, + numa_aware): """Test a dataset that maps a TF function across its input elements.""" # The pipeline is TensorSliceDataset -> # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size). @@ -57,14 +63,20 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = ( + dataset = ( dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply( batching.map_and_batch( map_func=_map_fn, batch_size=batch_size, num_parallel_calls=num_parallel_calls, - num_parallel_batches=num_parallel_batches)) - .make_initializable_iterator()) + num_parallel_batches=num_parallel_batches))) + + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + + iterator = dataset.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() @@ -115,16 +127,25 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) @parameterized.named_parameters( - ("Even", False), - ("Uneven", True), + ("Even", False, False), + ("Uneven", True, False), + ("EvenNUMA", False, True), + ("UnevenNUMA", True, True), ) - def testMapAndBatchPartialBatch(self, drop_remainder): - iterator = ( + def testMapAndBatchPartialBatch(self, drop_remainder, numa_aware): + dataset = ( dataset_ops.Dataset.range(10).apply( batching.map_and_batch( lambda x: array_ops.reshape(x * x, [1]), batch_size=4, - drop_remainder=drop_remainder)).make_one_shot_iterator()) + drop_remainder=drop_remainder))) + + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() + if drop_remainder: self.assertEqual([4, 1], iterator.output_shapes.as_list()) else: @@ -138,11 +159,21 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) - def testMapAndBatchYieldsPartialBatch(self): - iterator = (dataset_ops.Dataset.range(10) - .apply(batching.map_and_batch( - lambda x: array_ops.reshape(x * x, [1]), 4)) - .make_one_shot_iterator()) + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchYieldsPartialBatch(self, numa_aware): + dataset = ( + dataset_ops.Dataset.range(10).apply( + batching.map_and_batch(lambda x: array_ops.reshape(x * x, [1]), 4))) + + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + + iterator = dataset.make_one_shot_iterator() self.assertEqual([None, 1], iterator.output_shapes.as_list()) next_element = iterator.get_next() with self.cached_session() as sess: @@ -152,10 +183,19 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) - def testMapAndBatchParallelGetNext(self): - iterator = (dataset_ops.Dataset.range(50000) - .apply(batching.map_and_batch(lambda x: x, batch_size=100)) - .make_one_shot_iterator()) + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchParallelGetNext(self, numa_aware): + dataset = dataset_ops.Dataset.range(50000).apply( + batching.map_and_batch(lambda x: x, batch_size=100)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() + elements = [] for _ in range(100): elements.append(iterator.get_next()) @@ -165,17 +205,26 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): got.sort(key=lambda x: x[0]) expected = [] for j in range(100): - expected.append(range(i*10000+j*100, i*10000+(j+1)*100)) + expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100)) self.assertAllEqual(got, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(elements) - def testMapAndBatchParallelGetNextDropRemainder(self): - iterator = ( - dataset_ops.Dataset.range(49999).apply( - batching.map_and_batch( - lambda x: x, batch_size=100, drop_remainder=True)) - .make_one_shot_iterator()) + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchParallelGetNextDropRemainder(self, numa_aware): + dataset = dataset_ops.Dataset.range(49999).apply( + batching.map_and_batch( + lambda x: x, batch_size=100, drop_remainder=True)) + + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() + elements = [] for _ in range(100): elements.append(iterator.get_next()) @@ -185,19 +234,29 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): got.sort(key=lambda x: x[0]) expected = [] for j in range(100): - expected.append(range(i*10000+j*100, i*10000+(j+1)*100)) + expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100)) self.assertAllEqual(got, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(elements) - def testMapAndBatchSparse(self): + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchSparse(self, numa_aware): def _sparse(i): return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) - iterator = dataset_ops.Dataset.range(10).apply( - batching.map_and_batch(_sparse, 5)).make_initializable_iterator() + dataset = dataset_ops.Dataset.range(10).apply( + batching.map_and_batch(_sparse, 5)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer get_next = iterator.get_next() @@ -214,21 +273,33 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testMapAndBatchFails(self): + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchFails(self, numa_aware): """Test a dataset that maps a TF function across its input elements.""" dataset = dataset_ops.Dataset.from_tensors( array_ops.check_numerics( constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) + dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer with self.cached_session() as sess: with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): sess.run(init_op, feed_dict={batch_size: 14}) - def testMapAndBatchShapeMismatch(self): + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchShapeMismatch(self, numa_aware): """Test a dataset that maps a TF function across its input elements.""" def generator(): @@ -240,9 +311,13 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset_ops.Dataset.from_generator( generator, output_types=dtypes.int32) batch_size = 4 - iterator = ( - dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) + dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: @@ -251,7 +326,11 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): "number of elements does not match"): sess.run(get_next) - def testMapAndBatchImplicitDispose(self): + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchImplicitDispose(self, numa_aware): # Tests whether a map and batch dataset will be cleaned up correctly when # the pipeline does not run it until exhaustion. # The pipeline is TensorSliceDataset -> RepeatDataset(1000) -> @@ -266,6 +345,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat( 1000).apply(batching.map_and_batch(_map_fn, batch_size=100)) dataset = dataset.prefetch(5) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() @@ -274,26 +357,38 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(get_next) @parameterized.named_parameters( - ("1", 0), - ("2", 5), - ("3", 10), - ("4", 90), - ("5", 95), - ("6", 99), + ("1", 0, False), + ("2", 5, False), + ("3", 10, False), + ("4", 90, False), + ("5", 95, False), + ("6", 99, False), + ("1NUMA", 0, True), + ("2NUMA", 5, True), + ("3NUMA", 10, True), + ("4NUMA", 90, True), + ("5NUMA", 95, True), + ("6NUMA", 99, True), ) - def testMapAndBatchOutOfRangeError(self, threshold): + def testMapAndBatchOutOfRangeError(self, threshold, numa_aware): def raising_py_fn(i): - if i >= threshold: + if i == threshold: raise StopIteration() + elif i > threshold: + raise RuntimeError("Alternate error; you shouldn't see me! (i: %s)" % i) else: return i - iterator = ( - dataset_ops.Dataset.range(100).apply( - batching.map_and_batch( - lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64), - batch_size=10)).make_one_shot_iterator()) + dataset = dataset_ops.Dataset.range(100).apply( + batching.map_and_batch( + lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64), + batch_size=10)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: @@ -307,25 +402,42 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(get_next) @parameterized.named_parameters( - ("1", False, dtypes.bool), - ("2", -42, dtypes.int8), - ("3", -42, dtypes.int16), - ("4", -42, dtypes.int32), - ("5", -42, dtypes.int64), - ("6", 42, dtypes.uint8), - ("7", 42, dtypes.uint16), - ("8", 42.0, dtypes.float16), - ("9", 42.0, dtypes.float32), - ("10", 42.0, dtypes.float64), - ("11", b"hello", dtypes.string), + ("1", False, dtypes.bool, False), + ("2", -42, dtypes.int8, False), + ("3", -42, dtypes.int16, False), + ("4", -42, dtypes.int32, False), + ("5", -42, dtypes.int64, False), + ("6", 42, dtypes.uint8, False), + ("7", 42, dtypes.uint16, False), + ("8", 42.0, dtypes.float16, False), + ("9", 42.0, dtypes.float32, False), + ("10", 42.0, dtypes.float64, False), + ("11", b"hello", dtypes.string, False), + ("1NUMA", False, dtypes.bool, True), + ("2NUMA", -42, dtypes.int8, True), + ("3NUMA", -42, dtypes.int16, True), + ("4NUMA", -42, dtypes.int32, True), + ("5NUMA", -42, dtypes.int64, True), + ("6NUMA", 42, dtypes.uint8, True), + ("7NUMA", 42, dtypes.uint16, True), + ("8NUMA", 42.0, dtypes.float16, True), + ("9NUMA", 42.0, dtypes.float32, True), + ("10NUMA", 42.0, dtypes.float64, True), + ("11NUMA", b"hello", dtypes.string, True), ) - def testMapAndBatchTypes(self, element, dtype): + def testMapAndBatchTypes(self, element, dtype, numa_aware): + def gen(): yield element dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply( batching.map_and_batch(lambda x: x, batch_size=10)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: @@ -363,6 +475,40 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase): sess.run(iterator.initializer, feed_dict={captured_t: 42}) self.assertAllEqual([42] * 10, sess.run(get_next)) + @parameterized.named_parameters( + ("Normal", False), + ("NUMA", True), + ) + def testMapAndBatchControlFlow(self, numa_aware): + + def map_fn(x): + previous_cond_v2_value = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = True + return_value = control_flow_ops.cond(x < 50, lambda: x + 1, lambda: x * x) + control_flow_ops.ENABLE_COND_V2 = previous_cond_v2_value + return return_value + + dataset = dataset_ops.Dataset.range(100).apply( + batching.map_and_batch(map_fn, batch_size=10)) + if numa_aware: + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + with self.cached_session() as sess: + for i in range(10): + print("Case %d" % i) + if i < 5: + self.assertAllEqual([i * 10 + j + 1 for j in range(10)], + sess.run(get_next)) + else: + self.assertAllEqual( + [((i * 10) + j) * ((i * 10) + j) for j in range(10)], + sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD index c92bb8b9bc..5a0a73fd83 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD @@ -161,6 +161,7 @@ py_test( "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) @@ -199,6 +200,7 @@ py_test( deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", + "//tensorflow/python/data/experimental/ops:batching", "//tensorflow/python/data/experimental/ops:optimization", "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py index 82516356df..d38255a6ea 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import time +from absl.testing import parameterized import numpy as np from tensorflow.python.data.experimental.ops import batching @@ -29,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import test -class ModelDatasetTest(test_base.DatasetTestBase): +class ModelDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testModelMap(self): k = 1024 * 1024 @@ -82,7 +83,11 @@ class ModelDatasetTest(test_base.DatasetTestBase): (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas), np.max(deltas))) - def testModelMapAndBatch(self): + @parameterized.named_parameters( + ("Default", False), + ("NUMA", True), + ) + def testModelMapAndBatch(self, numa_aware): batch_size = 16 k = 1024 * 1024 dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k), @@ -95,6 +100,8 @@ class ModelDatasetTest(test_base.DatasetTestBase): batch_size=batch_size)) options = dataset_ops.Options() options.experimental_autotune = True + if numa_aware: + options.experimental_numa_aware = True iterator = dataset.with_options(options).make_one_shot_iterator() get_next = iterator.get_next() diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py index 760cd8cc4e..2ef29796ab 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.data.experimental.ops import batching from tensorflow.python.data.experimental.ops import optimization from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops @@ -59,6 +60,21 @@ class OptimizeDatasetTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testNumaAwareRewrite(self): + dataset = dataset_ops.Dataset.range(10).apply( + optimization.assert_next(["NumaMapAndBatch"])).apply( + batching.map_and_batch(lambda x: x * x, 10)) + options = dataset_ops.Options() + options.experimental_numa_aware = True + dataset = dataset.with_options(options) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.cached_session() as sess: + self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testOptimizationStatefulFunction(self): dataset = dataset_ops.Dataset.range(10).map( lambda _: random_ops.random_uniform([])).batch(10) diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD index e556b65b7c..a97cff9fbb 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD @@ -306,6 +306,21 @@ py_test( ], ) +py_test( + name = "numa_map_and_batch_dataset_serialization_test", + size = "medium", + srcs = ["numa_map_and_batch_dataset_serialization_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":dataset_serialization_test_base", + "//tensorflow/python:client_testlib", + "//tensorflow/python:math_ops", + "//tensorflow/python/data/experimental/ops:batching", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_test( name = "map_dataset_serialization_test", size = "medium", diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py new file mode 100644 index 0000000000..04aab329cd --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py @@ -0,0 +1,95 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the MapAndBatchDataset serialization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base +from tensorflow.python.data.experimental.ops import batching +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class MapAndBatchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def testNumParallelBatches(self): + range_size = 11 + num_repeats = 2 + batch_size = 5 + total_outputs = range_size * num_repeats + num_outputs_drop_remainder = total_outputs // batch_size + num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size)) + num_parallel_batches = 2 + + def build_ds(range_start, drop_remainder=False): + + def _map_fn(x): + return math_ops.square(x) + + ds = dataset_ops.Dataset.range( + range_start, range_start + range_size).repeat(num_repeats).apply( + batching.map_and_batch( + map_func=_map_fn, + batch_size=batch_size, + num_parallel_batches=num_parallel_batches, + drop_remainder=drop_remainder)) + options = dataset_ops.Options() + options.experimental_numa_aware = True + return ds.with_options(options) + + self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15), + num_outputs_keep_remainder) + self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True), + num_outputs_drop_remainder) + + def testNumParallelCalls(self): + range_size = 11 + num_repeats = 2 + batch_size = 5 + total_outputs = range_size * num_repeats + num_outputs_drop_remainder = total_outputs // batch_size + num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size)) + num_parallel_calls = 7 + + def build_ds(range_start, drop_remainder=False): + + def _map_fn(x): + return math_ops.square(x) + + ds = dataset_ops.Dataset.range( + range_start, range_start + range_size).repeat(num_repeats).apply( + batching.map_and_batch( + map_func=_map_fn, + batch_size=batch_size, + num_parallel_calls=num_parallel_calls, + drop_remainder=drop_remainder)) + options = dataset_ops.Options() + options.experimental_numa_aware = True + return ds.with_options(options) + + self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15), + num_outputs_keep_remainder) + self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True), + num_outputs_drop_remainder) + + +if __name__ == "__main__": + test.main() + diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD index 915d399f1b..46a9552b61 100644 --- a/tensorflow/python/data/experimental/ops/BUILD +++ b/tensorflow/python/data/experimental/ops/BUILD @@ -122,6 +122,7 @@ py_library( "//tensorflow/python:array_ops", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:tensor_shape", diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index cf52f7529a..6195747671 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1410,6 +1410,8 @@ class Options(object): "Whether to eliminate no-op transformations."), ("experimental_shuffle_and_repeat_fusion", bool, "Whether to fuse shuffle and repeat transformations."), + ("experimental_numa_aware", bool, + "Whether to use NUMA-aware operations."), ]: def _make_getter(name): # pylint: disable=no-self-argument @@ -1458,6 +1460,9 @@ class Options(object): for exp_opt in experimental_optimizations: if getattr(self, "experimental_" + exp_opt): result.append(exp_opt) + + if getattr(self, "experimental_numa_aware"): + result.append("map_and_batch_numa_aware_replacement") return result def merge(self, options): @@ -1485,7 +1490,7 @@ class Options(object): "experimental_map_and_filter_fusion", "experimental_map_fusion", "experimental_map_parallelization", "experimental_map_vectorization", "experimental_noop_elimination", - "experimental_shuffle_and_repeat_fusion" + "experimental_shuffle_and_repeat_fusion", "experimental_numa_aware", ]: this = getattr(result, name) that = getattr(other, name) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt index d15dccc173..22256996d3 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt @@ -42,6 +42,10 @@ tf_class { name: "experimental_noop_elimination" mtype: "" } + member { + name: "experimental_numa_aware" + mtype: "" + } member { name: "experimental_shuffle_and_repeat_fusion" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt index d15dccc173..22256996d3 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt @@ -42,6 +42,10 @@ tf_class { name: "experimental_noop_elimination" mtype: "" } + member { + name: "experimental_numa_aware" + mtype: "" + } member { name: "experimental_shuffle_and_repeat_fusion" mtype: "" -- GitLab From 8c2a52b26f21167ed0fcec7859850e38d0c216f9 Mon Sep 17 00:00:00 2001 From: Pavel Sountsov Date: Tue, 9 Oct 2018 11:56:25 -0700 Subject: [PATCH 132/411] Silence tf.distributions deprecation messages caused by internal global function calls. E.g. register_kl calls would trigger such warnings. This spam was exacerbated by the fact that it happens before logging is initialized, so it is dumped prominently to STDERR. Worse yet it also happened no matter whether the user imported any symbols from tf.distributions or not as the relevant code is executed when you import TensorFlow. PiperOrigin-RevId: 216396036 --- tensorflow/contrib/distributions/__init__.py | 128 +++++++++--------- .../python/ops/distributions/distributions.py | 35 ++--- 2 files changed, 85 insertions(+), 78 deletions(-) diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 5cec93c4df..343eae3440 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -18,69 +18,73 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member +from tensorflow.python.util import deprecation -from tensorflow.contrib.distributions.python.ops import bijectors -from tensorflow.contrib.distributions.python.ops.autoregressive import * -from tensorflow.contrib.distributions.python.ops.batch_reshape import * -from tensorflow.contrib.distributions.python.ops.binomial import * -from tensorflow.contrib.distributions.python.ops.cauchy import * -from tensorflow.contrib.distributions.python.ops.chi2 import * -from tensorflow.contrib.distributions.python.ops.conditional_distribution import * -from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * -from tensorflow.contrib.distributions.python.ops.deterministic import * -from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular -from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse -from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform -from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp -from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse -from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag -from tensorflow.contrib.distributions.python.ops.estimator import * -from tensorflow.contrib.distributions.python.ops.geometric import * -from tensorflow.contrib.distributions.python.ops.half_normal import * -from tensorflow.contrib.distributions.python.ops.independent import * -from tensorflow.contrib.distributions.python.ops.inverse_gamma import * -from tensorflow.contrib.distributions.python.ops.kumaraswamy import * -from tensorflow.contrib.distributions.python.ops.logistic import * -from tensorflow.contrib.distributions.python.ops.mixture import * -from tensorflow.contrib.distributions.python.ops.mixture_same_family import * -from tensorflow.contrib.distributions.python.ops.moving_stats import * -from tensorflow.contrib.distributions.python.ops.mvn_diag import * -from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import * -from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import * -from tensorflow.contrib.distributions.python.ops.mvn_tril import * -from tensorflow.contrib.distributions.python.ops.negative_binomial import * -from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import * -from tensorflow.contrib.distributions.python.ops.onehot_categorical import * -from tensorflow.contrib.distributions.python.ops.poisson import * -from tensorflow.contrib.distributions.python.ops.poisson_lognormal import * -from tensorflow.contrib.distributions.python.ops.quantized_distribution import * -from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import * -from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import * -from tensorflow.contrib.distributions.python.ops.sample_stats import * -from tensorflow.contrib.distributions.python.ops.seed_stream import * -from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import * -from tensorflow.contrib.distributions.python.ops.test_util import * -from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import * -from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import * -from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import * -from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import * -from tensorflow.contrib.distributions.python.ops.wishart import * -from tensorflow.python.ops.distributions.bernoulli import * -from tensorflow.python.ops.distributions.beta import * -from tensorflow.python.ops.distributions.categorical import * -from tensorflow.python.ops.distributions.dirichlet import * -from tensorflow.python.ops.distributions.dirichlet_multinomial import * -from tensorflow.python.ops.distributions.distribution import * -from tensorflow.python.ops.distributions.exponential import * -from tensorflow.python.ops.distributions.gamma import * -from tensorflow.python.ops.distributions.kullback_leibler import * -from tensorflow.python.ops.distributions.laplace import * -from tensorflow.python.ops.distributions.multinomial import * -from tensorflow.python.ops.distributions.normal import * -from tensorflow.python.ops.distributions.student_t import * -from tensorflow.python.ops.distributions.transformed_distribution import * -from tensorflow.python.ops.distributions.uniform import * + +# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member,g-import-not-at-top + +with deprecation.silence(): + from tensorflow.contrib.distributions.python.ops import bijectors + from tensorflow.contrib.distributions.python.ops.autoregressive import * + from tensorflow.contrib.distributions.python.ops.batch_reshape import * + from tensorflow.contrib.distributions.python.ops.binomial import * + from tensorflow.contrib.distributions.python.ops.cauchy import * + from tensorflow.contrib.distributions.python.ops.chi2 import * + from tensorflow.contrib.distributions.python.ops.conditional_distribution import * + from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * + from tensorflow.contrib.distributions.python.ops.deterministic import * + from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular + from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse + from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform + from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp + from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse + from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag + from tensorflow.contrib.distributions.python.ops.estimator import * + from tensorflow.contrib.distributions.python.ops.geometric import * + from tensorflow.contrib.distributions.python.ops.half_normal import * + from tensorflow.contrib.distributions.python.ops.independent import * + from tensorflow.contrib.distributions.python.ops.inverse_gamma import * + from tensorflow.contrib.distributions.python.ops.kumaraswamy import * + from tensorflow.contrib.distributions.python.ops.logistic import * + from tensorflow.contrib.distributions.python.ops.mixture import * + from tensorflow.contrib.distributions.python.ops.mixture_same_family import * + from tensorflow.contrib.distributions.python.ops.moving_stats import * + from tensorflow.contrib.distributions.python.ops.mvn_diag import * + from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import * + from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import * + from tensorflow.contrib.distributions.python.ops.mvn_tril import * + from tensorflow.contrib.distributions.python.ops.negative_binomial import * + from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import * + from tensorflow.contrib.distributions.python.ops.onehot_categorical import * + from tensorflow.contrib.distributions.python.ops.poisson import * + from tensorflow.contrib.distributions.python.ops.poisson_lognormal import * + from tensorflow.contrib.distributions.python.ops.quantized_distribution import * + from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import * + from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import * + from tensorflow.contrib.distributions.python.ops.sample_stats import * + from tensorflow.contrib.distributions.python.ops.seed_stream import * + from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import * + from tensorflow.contrib.distributions.python.ops.test_util import * + from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import * + from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import * + from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import * + from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import * + from tensorflow.contrib.distributions.python.ops.wishart import * + from tensorflow.python.ops.distributions.bernoulli import * + from tensorflow.python.ops.distributions.beta import * + from tensorflow.python.ops.distributions.categorical import * + from tensorflow.python.ops.distributions.dirichlet import * + from tensorflow.python.ops.distributions.dirichlet_multinomial import * + from tensorflow.python.ops.distributions.distribution import * + from tensorflow.python.ops.distributions.exponential import * + from tensorflow.python.ops.distributions.gamma import * + from tensorflow.python.ops.distributions.kullback_leibler import * + from tensorflow.python.ops.distributions.laplace import * + from tensorflow.python.ops.distributions.multinomial import * + from tensorflow.python.ops.distributions.normal import * + from tensorflow.python.ops.distributions.student_t import * + from tensorflow.python.ops.distributions.transformed_distribution import * + from tensorflow.python.ops.distributions.uniform import * # pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member diff --git a/tensorflow/python/ops/distributions/distributions.py b/tensorflow/python/ops/distributions/distributions.py index 59ed455e43..b18caa5b2e 100644 --- a/tensorflow/python/ops/distributions/distributions.py +++ b/tensorflow/python/ops/distributions/distributions.py @@ -17,21 +17,24 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.util import deprecation -# pylint: disable=wildcard-import,unused-import -from tensorflow.python.ops.distributions.bernoulli import Bernoulli -from tensorflow.python.ops.distributions.beta import Beta -from tensorflow.python.ops.distributions.categorical import Categorical -from tensorflow.python.ops.distributions.dirichlet import Dirichlet -from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial -from tensorflow.python.ops.distributions.distribution import * -from tensorflow.python.ops.distributions.exponential import Exponential -from tensorflow.python.ops.distributions.gamma import Gamma -from tensorflow.python.ops.distributions.kullback_leibler import * -from tensorflow.python.ops.distributions.laplace import Laplace -from tensorflow.python.ops.distributions.multinomial import Multinomial -from tensorflow.python.ops.distributions.normal import Normal -from tensorflow.python.ops.distributions.student_t import StudentT -from tensorflow.python.ops.distributions.uniform import Uniform -# pylint: enable=wildcard-import,unused-import +# pylint: disable=wildcard-import,unused-import,g-import-not-at-top +with deprecation.silence(): + from tensorflow.python.ops.distributions.bernoulli import Bernoulli + from tensorflow.python.ops.distributions.beta import Beta + from tensorflow.python.ops.distributions.categorical import Categorical + from tensorflow.python.ops.distributions.dirichlet import Dirichlet + from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial + from tensorflow.python.ops.distributions.distribution import * + from tensorflow.python.ops.distributions.exponential import Exponential + from tensorflow.python.ops.distributions.gamma import Gamma + from tensorflow.python.ops.distributions.kullback_leibler import * + from tensorflow.python.ops.distributions.laplace import Laplace + from tensorflow.python.ops.distributions.multinomial import Multinomial + from tensorflow.python.ops.distributions.normal import Normal + from tensorflow.python.ops.distributions.student_t import StudentT + from tensorflow.python.ops.distributions.uniform import Uniform +# pylint: enable=wildcard-import,unused-import +del deprecation -- GitLab From 0c6baae5af46bb22ea52db724e2194845d3bbf8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 12:24:05 -0700 Subject: [PATCH 133/411] Add RaggedTensors to tf.core. Moving the RaggedGather op kernel. PiperOrigin-RevId: 216400726 --- tensorflow/core/BUILD | 15 + .../base_api/api_def_RaggedGather.pbtxt | 81 +++++ tensorflow/core/kernels/BUILD | 31 ++ tensorflow/core/kernels/ragged_gather_op.cc | 292 ++++++++++++++++++ .../core/kernels/ragged_gather_op_test.cc | 281 +++++++++++++++++ tensorflow/core/ops/ragged_array_ops.cc | 85 +++++ 6 files changed, 785 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt create mode 100644 tensorflow/core/kernels/ragged_gather_op.cc create mode 100644 tensorflow/core/kernels/ragged_gather_op_test.cc create mode 100644 tensorflow/core/ops/ragged_array_ops.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index acea8e2217..9e7806342a 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1154,6 +1154,19 @@ tf_gen_op_libs( ], ) +cc_library( + name = "ragged_ops", + deps = [ + ":ragged_array_ops_op_lib", + ], +) + +tf_gen_op_libs( + op_lib_names = [ + "ragged_array_ops", + ], +) + cc_library( name = "ops", visibility = ["//visibility:public"], @@ -1187,6 +1200,7 @@ cc_library( ":nn_ops_op_lib", ":no_op_op_lib", ":parsing_ops_op_lib", + ":ragged_ops", ":random_ops_op_lib", ":remote_fused_graph_ops_op_lib", ":resource_variable_ops_op_lib", @@ -1340,6 +1354,7 @@ cc_library( "//tensorflow/core/kernels:parameterized_truncated_normal_op", "//tensorflow/core/kernels:parsing", "//tensorflow/core/kernels:partitioned_function_ops", + "//tensorflow/core/kernels:ragged_ops", "//tensorflow/core/kernels:random_ops", "//tensorflow/core/kernels:random_poisson_op", "//tensorflow/core/kernels:remote_fused_graph_ops", diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt new file mode 100644 index 0000000000..240c987dda --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt @@ -0,0 +1,81 @@ +op { + graph_op_name: "RaggedGather" + visibility: HIDDEN + in_arg { + name: "params_nested_splits" + description: < +#include +#include +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/util/util.h" + +namespace tensorflow { + +namespace { + +// For each slice in `(start, limit)` in `value_slices`, append +// `params_dense_values_in[start:limit] to `values_out`. `value_size` indicates +// the number of scalars contained in each value params_dense_values_in[i]. +template +void WriteValueSlices(const Tensor& params_dense_values_in, + const std::vector>& value_slices, + int64 value_size, Tensor* values_out) { + const auto& params_dense_values = + params_dense_values_in.flat_outer_dims(); + auto values = values_out->flat_outer_dims(); + int out_pos = 0; + for (const auto& slice : value_slices) { + for (int i = slice.first; i < slice.second; ++i) { + for (int j = 0; j < value_size; ++j) { + values(out_pos, j) = params_dense_values(i, j); + } + ++out_pos; + } + } +} + +} // namespace + +template +class RaggedGatherOpBase : public OpKernel { + public: + using OpKernel::OpKernel; + + void Compute(OpKernelContext* context) override { + // Get the input Tensors. + OpInputList params_nested_splits_in; + OP_REQUIRES_OK(context, context->input_list("params_nested_splits", + ¶ms_nested_splits_in)); + const Tensor& params_dense_values_in = + context->input(params_nested_splits_in.size()); + const Tensor& indices_in = + context->input(params_nested_splits_in.size() + 1); + + DCHECK_GT(params_nested_splits_in.size(), 0); // Enforced by REGISTER_OP. + int64 num_params = params_nested_splits_in[0].dim_size(0) - 1; + OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params)); + + OP_REQUIRES(context, params_dense_values_in.dims() > 0, + errors::InvalidArgument("params.rank must be nonzero")); + int64 num_params_dense_values = params_dense_values_in.dim_size(0); + + // Calculate the `splits`, and store the value slices that we need to + // copy in `value_slices`. + std::vector> value_slices; + int64 num_values = 0; + std::vector> out_splits; + OP_REQUIRES_OK(context, MakeSplits(indices_in, params_nested_splits_in, + num_params_dense_values, &out_splits, + &value_slices, &num_values)); + + // Write the output tensors. + OP_REQUIRES_OK(context, WriteSplits(out_splits, context)); + OP_REQUIRES_OK(context, + WriteValues(params_dense_values_in, value_slices, + out_splits.size(), num_values, context)); + } + + private: + // Check if any indices are out-of-bounds. + ::tensorflow::Status ValidateIndices(const Tensor& indices_in, + int64 num_params) { + const auto& indices = indices_in.flat(); + for (int64 i = 0; i < indices.size(); ++i) { + int64 index = indices(i); + if (index < 0 || index >= num_params) { + return errors::InvalidArgument( + "indices", SliceDebugString(indices_in.shape(), i), " = ", index, + " is not in [0, ", num_params, ")"); + } + } + return ::tensorflow::Status::OK(); + } + + // Construct the `splits` output tensors, encoded using a nested vector. + // Also find the slices of values that need to be copied, and store them + // in `value_slices`. The total number of values that will be copied (which + // we need for allocating the output values tensor) is stored in `num_values`. + ::tensorflow::Status MakeSplits( + const Tensor& indices_in, const OpInputList& params_nested_splits_in, + int64 num_params_dense_values, + std::vector>* out_splits, + std::vector>* value_slices, int64* num_values) { + *num_values = 0; + value_slices->clear(); + + int num_splits = indices_in.dims() - 1 + params_nested_splits_in.size(); + out_splits->assign(num_splits, {0}); + + // Get Eigen tensors. + const auto& indices = indices_in.flat(); + std::vector::ConstFlat> params_nested_splits; + params_nested_splits.reserve(params_nested_splits_in.size()); + for (const auto& splits_in : params_nested_splits_in) { + params_nested_splits.push_back(splits_in.flat()); + } + + TF_RETURN_IF_ERROR( + ValidateSplits(params_nested_splits, num_params_dense_values)); + + // Add `splits` that come from all but the last dimension of the dense + // Tensor `indices`. In particular, for each dimension D, we add a + // splits tensor whose values are: + // range(splits.shape[D]*splits.shape[D+1] + 1, step=splits.shape[D+1]) + // E.g., if indices.shape=[5, 3] then we will add a splits tensor + // [0, 3, 6, 9, 12, 15], since the outermost dimension has 5 elements, + // each of which contains 3 values. + for (int dim = 0; dim < indices_in.dims() - 1; ++dim) { + int stride = indices_in.dim_size(dim + 1); + int index = stride; + for (int i = 0; i < indices_in.dim_size(dim); ++i) { + out_splits->at(dim).push_back(index); + index += stride; + } + } + + // Add `splits` that come from `params_nested_splits`. Starting with the + // outermost ragged dimension (i.e., the first `splits` tensor), we work + // our way in, finding the range of values that should be copied. As we + // go, we update the output `splits` for each dimension with the appropriate + // values. In particular, the *lengths* of the slices from `param_splits` + // should be copied to generate corresponding slice lengths in the output + // splits. E.g., if we are copying a ragged row with length 4, then we + // should add a new split point to out_splits that is 4 greater than the + // previous split point in out_splits. + for (int i = 0; i < indices.size(); ++i) { + int start = indices(i); + int limit = indices(i) + 1; + + // Copy splits. + for (int dim = 0; dim < params_nested_splits.size(); ++dim) { + const auto& splits = params_nested_splits[dim]; + int out_dim = dim + indices_in.dims() - 1; + if (out_dim >= 0) { + int64 delta = out_splits->at(out_dim).back() - splits(start); + for (int j = start; j < limit; ++j) { + out_splits->at(out_dim).push_back(splits(j + 1) + delta); + } + } + start = splits(start); + limit = splits(limit); + } + if (limit != start) { + value_slices->emplace_back(start, limit); + *num_values += limit - start; + } + } + return ::tensorflow::Status::OK(); + } + + ::tensorflow::Status ValidateSplits( + const std::vector::ConstFlat>& params_nested_splits, + int64 num_params_dense_values) { + // Validate + for (int dim = 0; dim < params_nested_splits.size(); ++dim) { + const auto& splits = params_nested_splits[dim]; + int64 last_split = (dim == params_nested_splits.size() - 1) + ? num_params_dense_values + : params_nested_splits[dim + 1].size(); + if (splits.size() == 0) { + return errors::InvalidArgument("Ragged splits may not be empty"); + } + if (splits(0) < 0) { + return errors::InvalidArgument("Ragged splits must be non-negative"); + } + if (splits(splits.size() - 1) > last_split) { + return errors::InvalidArgument( + "Ragged splits must not point past values"); + } + for (int i = 1; i < splits.size(); ++i) { + if (splits(i - 1) > splits(i)) { + return errors::InvalidArgument("Ragged splits must be sorted"); + } + } + } + return ::tensorflow::Status::OK(); + } + + ::tensorflow::Status WriteSplits( + const std::vector>& out_splits, + OpKernelContext* context) { + OpOutputList splits_out; + TF_RETURN_IF_ERROR( + context->output_list("output_nested_splits", &splits_out)); + for (int i = 0; i < out_splits.size(); ++i) { + Tensor* splits; + int64 num_splits = out_splits[i].size(); + TF_RETURN_IF_ERROR( + splits_out.allocate(i, TensorShape({num_splits}), &splits)); + auto splits_flat = splits->flat(); + std::copy_n(out_splits[i].data(), out_splits[i].size(), + splits_flat.data()); + } + return ::tensorflow::Status::OK(); + } + + ::tensorflow::Status WriteValues( + const Tensor& params_dense_values_in, + const std::vector>& value_slices, + int values_index, int64 num_values, OpKernelContext* context) const { + Tensor* values_out = nullptr; + TensorShape values_shape = params_dense_values_in.shape(); + values_shape.set_dim(0, num_values); + TF_RETURN_IF_ERROR( + context->allocate_output(values_index, values_shape, &values_out)); + int64 value_size = params_dense_values_in.NumElements() / + params_dense_values_in.dim_size(0); + CallWriteValueSlices(params_dense_values_in, value_slices, value_size, + values_out); + return ::tensorflow::Status::OK(); + } + + protected: + // Call WriteValueSlices() using the appropriate VALUE_TYPE template + // parameter. This pattern is used to reduce binary size. In particular, + // this allows us to have two instantiations of this class (one for each + // index type), rather than 14 (one for each index type and value type), + // which cuts the binary size of this op from ~300k to <90k. + virtual void CallWriteValueSlices( + const Tensor& params_dense_values_in, + const std::vector>& value_slices, + int64 value_size, Tensor* values_out) const = 0; +}; + +template +class RaggedGatherOp : public RaggedGatherOpBase { + public: + using RaggedGatherOpBase::RaggedGatherOpBase; + + private: + void CallWriteValueSlices( + const Tensor& params_dense_values_in, + const std::vector>& value_slices, + int64 value_size, Tensor* values_out) const override { + WriteValueSlices(params_dense_values_in, value_slices, + value_size, values_out); + } +}; + +#define REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(index_type, value_type) \ + REGISTER_KERNEL_BUILDER(Name("RaggedGather") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("Tindices") \ + .TypeConstraint("Tvalues"), \ + RaggedGatherOp); +#define REGISTER_CPU_KERNEL(value_type) \ + REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int32, value_type) \ + REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int64, value_type) +TF_CALL_POD_TYPES(REGISTER_CPU_KERNEL); +TF_CALL_string(REGISTER_CPU_KERNEL); +TF_CALL_QUANTIZED_TYPES(REGISTER_CPU_KERNEL); +TF_CALL_quint16(REGISTER_CPU_KERNEL); +TF_CALL_qint16(REGISTER_CPU_KERNEL); +TF_CALL_uint32(REGISTER_CPU_KERNEL); +TF_CALL_uint64(REGISTER_CPU_KERNEL); +#undef REGISTER_CPU_KERNEL +#undef REGISTER_CPU_KERNEL_WITH_INDEX_TYPE + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/ragged_gather_op_test.cc b/tensorflow/core/kernels/ragged_gather_op_test.cc new file mode 100644 index 0000000000..47be788151 --- /dev/null +++ b/tensorflow/core/kernels/ragged_gather_op_test.cc @@ -0,0 +1,281 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +class RaggedGatherOpTest : public ::tensorflow::OpsTestBase { + protected: + // Builds the tensorflow test graph for RaggedGather. + template + void BuildRaggedGatherGraph( + const TensorShape& indices_shape, const std::vector& indices, + const std::vector>& params_nested_splits, + const TensorShape& params_dense_values_shape, + const gtl::ArraySlice params_dense_values) { + const auto& value_dtype = DataTypeToEnum::v(); + const auto& index_dtype = DataTypeToEnum::v(); + int64 PARAMS_RAGGED_RANK = params_nested_splits.size(); + int64 num_splits = PARAMS_RAGGED_RANK + indices_shape.dims() - 1; + TF_ASSERT_OK( + NodeDefBuilder("tested_op", "RaggedGather") + .Input(FakeInput(PARAMS_RAGGED_RANK)) // params_nested_splits + .Input(FakeInput(value_dtype)) // params_dense_values + .Input(FakeInput(index_dtype)) // indices + .Attr("PARAMS_RAGGED_RANK", PARAMS_RAGGED_RANK) + .Attr("OUTPUT_RAGGED_RANK", num_splits) + .Attr("Tvalues", value_dtype) + .Attr("Tindices", index_dtype) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + for (const auto& splits : params_nested_splits) { + int64 splits_size = splits.size(); + AddInputFromArray(TensorShape({splits_size}), splits); + } + AddInputFromArray(params_dense_values_shape, + params_dense_values); + AddInputFromArray(indices_shape, indices); + } +}; + +TEST_F(RaggedGatherOpTest, RaggedGather) { + // indices = [2, 1, 0, 3] + // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] + // params.shape = [4, None] + BuildRaggedGatherGraph( + TensorShape({4}), // indices.shape + {2, 1, 0, 3}, // indices + {{0, 3, 3, 7, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + + TF_ASSERT_OK(RunOpKernel()); + + // Expected: [[.4, .5, .6, .7], [.1, .2, .3], [], [.8, .9]] + test::ExpectTensorEqual(*GetOutput(0), + test::AsTensor({0, 4, 4, 7, 9})); + test::ExpectTensorNear( + *GetOutput(1), + test::AsTensor({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1); +} + +TEST_F(RaggedGatherOpTest, RaggedGather_3DParams) { + // indices = [2, 1, 0, 2, 3] + // params = [[[]], [[.1, 2], [.3]], [], [[.4, .5], [.6, .7, .8]], [[.9]]] + // params.shape = [5, None, None] + BuildRaggedGatherGraph( + TensorShape({5}), // indices.shape + {2, 1, 0, 2, 3}, // indices + {{0, 1, 3, 3, 5, 6}, {0, 0, 2, 3, 5, 8, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + + TF_ASSERT_OK(RunOpKernel()); + + // Expected: [[], [[.1, 2], [.3]], [[]], [], [[.4, .5], [.6, .7, .8]]] + test::ExpectTensorEqual(*GetOutput(0), + test::AsTensor({0, 0, 2, 3, 3, 5})); + test::ExpectTensorEqual(*GetOutput(1), + test::AsTensor({0, 2, 3, 3, 5, 8})); + test::ExpectTensorNear( + *GetOutput(2), test::AsTensor({.1, .2, .3, .4, .5, .6, .7, .8}), + 0.1); +} + +TEST_F(RaggedGatherOpTest, RaggedGather_4DParams) { + // indices = [2, 1, 0, 2] + // params = [[[]], [[[1, 2], [3, 4], [5, 6]], [[7, 8]]], []] + // params.shape = [4, None, None, 2] + BuildRaggedGatherGraph( + TensorShape({4}), // indices.shape + {2, 1, 0, 2}, // indices + {{0, 1, 3, 3}, {0, 0, 3, 4}}, // params_nested_splits + TensorShape({4, 2}), // params_dense_values.shape + {1, 2, 3, 4, 5, 6, 7, 8} // params_dense_values + ); + + TF_ASSERT_OK(RunOpKernel()); + + // Expected: [[], + // [[[1, 2], [3, 4], [5, 6]], [[7, 8]]], + // [[]], + // []] + test::ExpectTensorEqual(*GetOutput(0), + test::AsTensor({0, 0, 2, 3, 3})); + test::ExpectTensorEqual(*GetOutput(1), + test::AsTensor({0, 3, 4, 4})); + test::ExpectTensorEqual( + *GetOutput(2), + test::AsTensor({1, 2, 3, 4, 5, 6, 7, 8}, TensorShape({4, 2}))); +} + +TEST_F(RaggedGatherOpTest, RaggedGather_2DIndices) { + // indices = [[2, 1], [0, 3]] + // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] + BuildRaggedGatherGraph( + TensorShape({2, 2}), // indices.shape + {2, 1, 0, 3}, // indices + {{0, 3, 3, 7, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + + TF_ASSERT_OK(RunOpKernel()); + + // Expected: [ [ [.4, .5, .6, .7], [.1, .2, .3] ], + // [ [], [.8, .9] ] ] + test::ExpectTensorEqual(*GetOutput(0), + test::AsTensor({0, 2, 4})); + test::ExpectTensorEqual(*GetOutput(1), + test::AsTensor({0, 4, 4, 7, 9})); + test::ExpectTensorNear( + *GetOutput(2), + test::AsTensor({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1); +} + +TEST_F(RaggedGatherOpTest, RaggedGather_ScalarIndices) { + // indices = 2 + // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] + BuildRaggedGatherGraph( + TensorShape({}), // indices.shape + {2}, // indices + {{0, 3, 3, 7, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + TF_ASSERT_OK(RunOpKernel()); + + // Expected: [.4, .5, .6, .7] + test::ExpectTensorNear(*GetOutput(0), + test::AsTensor({.4, .5, .6, .7}), 0.1); +} + +TEST_F(RaggedGatherOpTest, RaggedGather_OutOfBounds) { + // indices = [2, 10] + // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] + BuildRaggedGatherGraph( + TensorShape({2}), // indices.shape + {2, 10}, // indices + {{0, 3, 3, 7, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + EXPECT_EQ("indices[1] = 10 is not in [0, 4)", RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, InvalidSplitsNotSorted) { + BuildRaggedGatherGraph( + TensorShape({2}), // indices.shape + {0, 2}, // indices + {{0, 3, 5, 2, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + EXPECT_EQ("Ragged splits must be sorted", RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, InvalidSplitsNegative) { + BuildRaggedGatherGraph( + TensorShape({2}), // indices.shape + {0, 2}, // indices + {{-1, 3, 2, 7, 9}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + EXPECT_EQ("Ragged splits must be non-negative", + RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, InvalidSplitsEmpty) { + BuildRaggedGatherGraph( + TensorShape({0}), // indices.shape + {}, // indices + {{}}, // params_nested_splits + TensorShape({0}), // params_dense_values.shape + {} // params_dense_values + ); + EXPECT_EQ("Ragged splits may not be empty", RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, InvalidSplitsTooBig) { + BuildRaggedGatherGraph( + TensorShape({2}), // indices.shape + {0, 2}, // indices + {{0, 20, 40, 80, 100}}, // params_nested_splits + TensorShape({9}), // params_dense_values.shape + {.1, .2, .3, .4, .5, .6, .7, .8, .9} // params_dense_values + ); + EXPECT_EQ("Ragged splits must not point past values", + RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, BadValuesShape) { + BuildRaggedGatherGraph( + TensorShape({0}), // indices.shape + {}, // indices + {{0}}, // params_nested_splits + TensorShape({}), // params_dense_values.shape + {.1} // params_dense_values + ); + EXPECT_EQ("params.rank must be nonzero", RunOpKernel().error_message()); +} + +TEST_F(RaggedGatherOpTest, ShapeFn) { + // RaggedGather(param_splits+, param_values, indices) -> [splits+, values] + ShapeInferenceTestOp op("RaggedGather"); + + (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1); + (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(1); + INFER_OK(op, "?;?;?", "[?];?"); + INFER_OK(op, "[?];[?];[?]", "[?];[?]"); + INFER_OK(op, "[?];[?,?,?];[?]", "[?];[?,d1_1,d1_2]"); + INFER_OK(op, "[5];[10];[15]", "[?];[?]"); + INFER_OK(op, "[5];[10,2];[15]", "[?];[?,d1_1]"); + INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[5];[];[]"); + INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[1,2];[];[5]"); + + (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(2); + (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2); + INFER_OK(op, "?;?;?;?", "[?];[?];?"); + INFER_OK(op, "[?];[?];[?];[?]", "[?];[?];[?]"); + INFER_OK(op, "[?];[?];[?,?,?];[?]", "[?];[?];[?,d2_1,d2_2]"); + INFER_OK(op, "[5];[10];[15];[20]", "[?];[?];[?]"); + + (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1); + (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2); + INFER_OK(op, "?;?;?", "[?];[?];?"); + INFER_OK(op, "[?];[?];[?,?]", "[?];[?];[?]"); + INFER_OK(op, "[?];[?,?,?];[?,?]", "[?];[?];[?,d1_1,d1_2]"); + INFER_OK(op, "[15];[20];[5,10]", "[?];[?];[?]"); + INFER_OK(op, "[15];[20,2];[5,10]", "[?];[?];[?,d1_1]"); + + (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1); + (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(0); + INFER_OK(op, "[?];[?];[]", "[?]"); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/ops/ragged_array_ops.cc b/tensorflow/core/ops/ragged_array_ops.cc new file mode 100644 index 0000000000..4642579939 --- /dev/null +++ b/tensorflow/core/ops/ragged_array_ops.cc @@ -0,0 +1,85 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +Status RaggedGatherShapeFn(InferenceContext* c); + +//============================================================================== +// Registered Ops +//============================================================================== + +REGISTER_OP("RaggedGather") + .Input("params_nested_splits: PARAMS_RAGGED_RANK * int64") + .Input("params_dense_values: Tvalues") + .Input("indices: Tindices") + .Output("output_nested_splits: OUTPUT_RAGGED_RANK * int64") + .Output("output_dense_values: Tvalues") + .Attr("Tvalues: type") + .Attr("Tindices: {int32, int64}") + .Attr("PARAMS_RAGGED_RANK: int >= 1") + .Attr("OUTPUT_RAGGED_RANK: int >= 0") + .SetShapeFn(RaggedGatherShapeFn); + +//============================================================================== +// Shape Functions +//============================================================================== + +Status RaggedGatherShapeFn(InferenceContext* c) { + int num_splits; + int64 PARAMS_RAGGED_RANK; + TF_RETURN_IF_ERROR( + c->GetAttr("PARAMS_RAGGED_RANK", &PARAMS_RAGGED_RANK)); + TF_RETURN_IF_ERROR(c->GetAttr("OUTPUT_RAGGED_RANK", &num_splits)); + + // Check rank of `indices`. + ShapeHandle indices = c->input(PARAMS_RAGGED_RANK + 1); + TF_RETURN_IF_ERROR( + c->WithRank(indices, num_splits - PARAMS_RAGGED_RANK + 1, &indices)); + + // Check that all params_nested_splits have rank 1. + for (int64 i = 0; i < PARAMS_RAGGED_RANK; ++i) { + ShapeHandle splits = c->input(i); + TF_RETURN_IF_ERROR(c->WithRank(splits, 1, &splits)); + } + + // Check that `params_dense_values` has rank>=1. + ShapeHandle params_dense_values = c->input(PARAMS_RAGGED_RANK); + TF_RETURN_IF_ERROR( + c->WithRankAtLeast(params_dense_values, 1, ¶ms_dense_values)); + + // Set the rank for the `splits` outputs. + for (int i = 0; i < num_splits; ++i) { + c->set_output(i, c->UnknownShapeOfRank(1)); + } + + // Calculate the `values` shape. + ShapeHandle value = c->UnknownShape(); + ShapeHandle values = c->UnknownShape(); + TF_RETURN_IF_ERROR(c->Subshape(params_dense_values, 1, &value)); + TF_RETURN_IF_ERROR(c->Concatenate(c->UnknownShapeOfRank(1), value, &values)); + c->set_output(num_splits, values); + + return Status::OK(); +} + +} // namespace tensorflow -- GitLab From 1e13c38980ec17d9f26c041f4b251ecb3a791a2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 13:23:52 -0700 Subject: [PATCH 134/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216410913 --- .../core/ops/compat/ops_history.v1.pbtxt | 98 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 98 +++++++++++++++++++ 2 files changed, 196 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index dcea70dffb..cfb1055d3c 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -21858,6 +21858,54 @@ op { } is_stateful: true } +op { + name: "ExperimentalNumaMapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "num_parallel_calls" + type: DT_INT64 + } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ExperimentalThreadPoolDataset" input_arg { @@ -43915,6 +43963,56 @@ op { } } } +op { + name: "RaggedGather" + input_arg { + name: "params_nested_splits" + type: DT_INT64 + number_attr: "PARAMS_RAGGED_RANK" + } + input_arg { + name: "params_dense_values" + type_attr: "Tvalues" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "output_nested_splits" + type: DT_INT64 + number_attr: "OUTPUT_RAGGED_RANK" + } + output_arg { + name: "output_dense_values" + type_attr: "Tvalues" + } + attr { + name: "Tvalues" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "PARAMS_RAGGED_RANK" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "OUTPUT_RAGGED_RANK" + type: "int" + has_minimum: true + } +} op { name: "RandomCrop" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 93a297458f..05b97bffad 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -10365,6 +10365,54 @@ op { } is_stateful: true } +op { + name: "ExperimentalNumaMapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "num_parallel_calls" + type: DT_INT64 + } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ExperimentalThreadPoolDataset" input_arg { @@ -22288,6 +22336,56 @@ op { } } } +op { + name: "RaggedGather" + input_arg { + name: "params_nested_splits" + type: DT_INT64 + number_attr: "PARAMS_RAGGED_RANK" + } + input_arg { + name: "params_dense_values" + type_attr: "Tvalues" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "output_nested_splits" + type: DT_INT64 + number_attr: "OUTPUT_RAGGED_RANK" + } + output_arg { + name: "output_dense_values" + type_attr: "Tvalues" + } + attr { + name: "Tvalues" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "PARAMS_RAGGED_RANK" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "OUTPUT_RAGGED_RANK" + type: "int" + has_minimum: true + } +} op { name: "RandomCrop" input_arg { -- GitLab From 9989788be25c846d087ac70b76cf78759a209a3e Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 9 Oct 2018 13:31:58 -0700 Subject: [PATCH 135/411] Small cleanup in function_test. PiperOrigin-RevId: 216412380 --- tensorflow/python/framework/function_test.py | 27 ++++++-------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 87f567db0e..16d4903d79 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -1639,29 +1639,18 @@ class FunctionInlineControlTest(test.TestCase): self.assertEqual(MetadataHasCell(run_metadata), noinline) -@function.Defun(*[dtypes.float32] * 3) -def Linear(w, b, x): - return nn_ops.relu(math_ops.matmul(x, w) + b) - - -@function.Defun(*[dtypes.float32] * 5) -def Linear2(w1, b1, w2, b2, x): - return Linear(w2, b2, Linear(w1, b1, x)) - - -@function.Defun(*[dtypes.float32] * 3) -def LinearWithCApi(w, b, x): - return nn_ops.relu(math_ops.matmul(x, w) + b) - +class ModuleFunctionTest(test.TestCase): -@function.Defun(*[dtypes.float32] * 5) -def Linear2WithCApi(w1, b1, w2, b2, x): - return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x)) + def testBasic(self): + @function.Defun(*[dtypes.float32] * 3) + def LinearWithCApi(w, b, x): + return nn_ops.relu(math_ops.matmul(x, w) + b) -class ModuleFunctionTest(test.TestCase): + @function.Defun(*[dtypes.float32] * 5) + def Linear2WithCApi(w1, b1, w2, b2, x): + return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x)) - def testBasic(self): with ops.Graph().as_default(): a, b, c, d, e = [ constant_op.constant([[_]], dtype=dtypes.float32) for _ in range(5) -- GitLab From 761298537adab7196d4f24fa07384f4cd6ffae91 Mon Sep 17 00:00:00 2001 From: Scott Leishman Date: Tue, 9 Oct 2018 20:39:01 +0000 Subject: [PATCH 136/411] Ensure all bazel options are incorporated during Intel mkl builds. --- tensorflow/tools/docker/Dockerfile.devel-mkl | 4 +++- tensorflow/tools/docker/Dockerfile.devel-mkl-horovod | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl index e433e9ebb2..e664b6066b 100755 --- a/tensorflow/tools/docker/Dockerfile.devel-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-mkl @@ -115,6 +115,7 @@ RUN export TAG_PREFIX="v" && \ fi RUN yes "" | ${PYTHON} configure.py +RUN cp .bazelrc /root/.bazelrc ENV CI_BUILD_PYTHON ${PYTHON} @@ -125,7 +126,8 @@ ENV CI_BUILD_PYTHON ${PYTHON} # --copt=-march="avx" \ # For haswell, broadwell, or skylake # --copt=-march="avx2" \ -COPY .bazelrc /root/.bazelrc +COPY .bazelrc /root/.mkl.bazelrc +RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc RUN tensorflow/tools/ci_build/builds/configured CPU \ bazel --bazelrc=/root/.bazelrc build -c opt \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod index 48f2400569..136c775d6c 100755 --- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod +++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod @@ -106,6 +106,7 @@ RUN export TAG_PREFIX="v" && \ fi RUN yes "" | ${PYTHON} configure.py +RUN cp .bazelrc /root/.bazelrc ENV CI_BUILD_PYTHON ${PYTHON} @@ -116,7 +117,8 @@ ENV CI_BUILD_PYTHON ${PYTHON} # --copt=-march="avx" \ # For haswell, broadwell, or skylake # --copt=-march="avx2" \ -COPY .bazelrc /root/.bazelrc +COPY .bazelrc /root/.mkl.bazelrc +RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc RUN tensorflow/tools/ci_build/builds/configured CPU \ bazel --bazelrc=/root/.bazelrc build -c opt \ -- GitLab From 5d9a7fdf4f02c2db487a03e7ad2d520f8847c4e3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 9 Oct 2018 13:32:24 -0700 Subject: [PATCH 137/411] [XLA:GPU] Add an implementation of scatter for GPU This simple has a kernel that runs on every element of the updates tensor, figure out the right indices to perform the update, and applies it with an atomic operation. Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient. Also TuplePointsToAnalysis doesn't know that it should alias the operand and output buffers of a scatter, which would avoid a copy. PiperOrigin-RevId: 216412467 --- tensorflow/compiler/xla/service/gpu/BUILD | 1 - .../xla/service/gpu/ir_emitter_unnested.cc | 141 ++++++++++++++++++ .../xla/service/gpu/ir_emitter_unnested.h | 1 + .../xla/service/gpu/nvptx_compiler.cc | 3 - .../compiler/xla/service/layout_assignment.cc | 2 +- 5 files changed, 143 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 350fd32537..0144d59097 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -705,7 +705,6 @@ cc_library( "//tensorflow/compiler/xla/service:llvm_compiler", "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/service:reshape_mover", - "//tensorflow/compiler/xla/service:scatter_expander", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/compiler/xla/service:while_loop_constant_sinking", diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index c792dd2ddb..bef7a55301 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1958,6 +1958,147 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) { return Status::OK(); } +Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + const ScatterDimensionNumbers& dim_numbers = + scatter->scatter_dimension_numbers(); + CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape())); + + std::vector> thunks; + + // Copy the operand into the output if it's not the same buffer already. + auto operand_buffer = GetAllocationSlice(*operand); + auto destination_buffer = GetAllocationSlice(*scatter); + if (operand_buffer != destination_buffer) { + thunks.push_back(absl::make_unique( + /*source_address=*/operand_buffer, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter)); + } + + auto loop_body_emitter = [&](const IrArray::Index& index) -> Status { + std::vector raw_window_multidim; + std::vector input_scatter_multidim; + std::vector raw_window_bounds; + + // Partition the index into window indices and scatter indices. + for (int64 i = 0, e = index.size(); i != e; ++i) { + // For window indices also remember the window size, this comes in handy + // later. + if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) { + raw_window_multidim.push_back(index[i]); + raw_window_bounds.push_back(updates->shape().dimensions(i)); + } else { + input_scatter_multidim.push_back(index[i]); + } + } + DCHECK_EQ(raw_window_multidim.size(), + dim_numbers.update_window_dims_size()); + + // Apply inserted_window_dims to the window dimensions. + int64 raw_window_multidim_idx = 0; + std::vector input_window_multidim; + std::vector input_window_bounds; + for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) { + if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) { + input_window_bounds.push_back(1); // Trivial dimension. + input_window_multidim.push_back(index.GetConstantWithIndexType(0)); + } else { + input_window_bounds.push_back( + raw_window_bounds[raw_window_multidim_idx]); + input_window_multidim.push_back( + raw_window_multidim[raw_window_multidim_idx]); + ++raw_window_multidim_idx; + } + } + DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape())); + + // Insert a 1 dimension at the end if index_vector_dim requests one. + Shape scatter_indices_shape = scatter_indices->shape(); + if (dim_numbers.index_vector_dim() == + ShapeUtil::Rank(scatter_indices_shape)) { + scatter_indices_shape.add_dimensions(1); + scatter_indices_shape.mutable_layout()->add_minor_to_major( + dim_numbers.index_vector_dim()); + } + llvm_ir::IrArray scatter_indices_reshaped = + GetIrArray(*scatter_indices, *scatter) + .CastToShape(scatter_indices_shape, &b_); + + // Now load the indices corresponding to the current window from + // scatter_indices. + llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim, + index.GetType()); + raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr); + llvm::Value* is_in_bounds = b_.getTrue(); + for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size(); + i != e; ++i) { + // Our index is stored along index_vector_dim, insert that into the lookup + // index into scatter_indices. + raw_scatter_index_index[dim_numbers.index_vector_dim()] = + raw_scatter_index_index.GetConstantWithIndexType(i); + + int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i); + llvm::Value* loaded_scatter_index = + scatter_indices_reshaped.EmitReadArrayElement(raw_scatter_index_index, + &b_, "scatter_index"); + // And add the index to our window index. This yields the output index. + llvm::Value* dim_offset = + Add(input_window_multidim[operand_dim], + IntCast(loaded_scatter_index, index.GetType(), + /*isSigned=*/true)); + input_window_multidim[operand_dim] = dim_offset; + + // Also do the bounds check now. + int64 max_index = operand->shape().dimensions(operand_dim) - + input_window_bounds[operand_dim] + 1; + // is_in_bounds = dim_offset >= 0 && dim_offset < dim_size-window_size+1 + // --> dim_offset u< dim_size-window_size+1 + is_in_bounds = + And(is_in_bounds, + ICmpULT(dim_offset, index.GetConstantWithIndexType(max_index))); + } + + llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse( + is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false); + llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_); + // All done, now just read from the calculated input from the window, and do + // an atomic store to the calculated location in the output. + llvm_ir::IrArray::Index input_window_index(input_window_multidim, + index.GetType()); + llvm::Value* input_address = + GetIrArray(*updates, *scatter).EmitArrayElementAddress(index, &b_); + llvm::Value* output_address = + GetIrArray(*scatter, *scatter) + .EmitArrayElementAddress(input_window_index, &b_); + return EmitAtomicOperationForNestedComputation( + *scatter->to_apply(), output_address, input_address); + }; + + // Launch a kernel that reads every element in the updates tensor. We could + // also do one kernel per window instead if bounds checks turn out to be a + // bottleneck. + thunks.push_back(BuildKernelThunk( + scatter, + /*implements_whole_instruction=*/operand_buffer == destination_buffer)); + + LaunchDimensions launch_dimensions = CalculateLaunchDimensions( + updates->shape(), ir_emitter_context_->device_description()); + UpdateLaunchDimensions(launch_dimensions, + static_cast(thunks.back().get()), + ir_emitter_context_->llvm_module()); + + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), scatter)); + return ParallelLoopEmitter(loop_body_emitter, updates->shape(), + launch_dimensions, &b_) + .EmitLoop(IrName(scatter), + GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(), + &b_)); +} + Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { thunk_sequence_->push_back( BuildKernelThunk(select, /*implements_whole_instruction=*/true)); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index bd5db72051..2e36e7235b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter { Status HandleInfeed(HloInstruction* xla_infeed) override; Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleRng(HloInstruction* random) override; + Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index ac6c2c5565..5409f65589 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -75,7 +75,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" -#include "tensorflow/compiler/xla/service/scatter_expander.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" @@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // elimination has to come after that pass. pipeline.AddPass(); - pipeline.AddPass(); - pass.AddPass( /*is_layout_sensitive=*/false, [](const Shape&, const Shape&) { return false; }); diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index ad65b147c1..2cf5fc94ac 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1908,6 +1908,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kRemainder: case HloOpcode::kReverse: case HloOpcode::kRoundNearestAfz: + case HloOpcode::kScatter: case HloOpcode::kSelect: case HloOpcode::kSelectAndScatter: case HloOpcode::kShiftLeft: @@ -1946,7 +1947,6 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kReduce: case HloOpcode::kReshape: case HloOpcode::kRng: - case HloOpcode::kScatter: case HloOpcode::kSend: case HloOpcode::kSendDone: case HloOpcode::kAfterAll: -- GitLab From eaaa3cebc72766dc55e5db5c8cb53fca0d1d0215 Mon Sep 17 00:00:00 2001 From: Muhammad Wildan Date: Wed, 10 Oct 2018 03:41:38 +0700 Subject: [PATCH 138/411] Update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57efb876c9..c582cf873c 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,12 @@ subscribing to [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). ## Installation -*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.* +for install current release for CPU-only: +pip install tensorflow + +GPU package for CUDA-enabled GPU cards: +pip install tensorflow-gpu + People who are a little more adventurous can also try our nightly binaries: -- GitLab From 7e11278b5905fd2252e7c0ec245cde4af5c67c51 Mon Sep 17 00:00:00 2001 From: Muhammad Wildan Date: Wed, 10 Oct 2018 03:42:18 +0700 Subject: [PATCH 139/411] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c582cf873c..62edc4c0b2 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,11 @@ subscribing to ## Installation for install current release for CPU-only: + pip install tensorflow GPU package for CUDA-enabled GPU cards: + pip install tensorflow-gpu -- GitLab From 56a14850210374491a09506b987b02038ae2b03e Mon Sep 17 00:00:00 2001 From: Muhammad Wildan Date: Wed, 10 Oct 2018 03:43:24 +0700 Subject: [PATCH 140/411] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 62edc4c0b2..9f3431fc4b 100644 --- a/README.md +++ b/README.md @@ -31,11 +31,11 @@ subscribing to ## Installation for install current release for CPU-only: -pip install tensorflow +*pip install tensorflow* GPU package for CUDA-enabled GPU cards: -pip install tensorflow-gpu +*pip install tensorflow-gpu* People who are a little more adventurous can also try our nightly binaries: -- GitLab From 2499e8d4e0d960b96fe049c0f299c2d034305edf Mon Sep 17 00:00:00 2001 From: Muhammad Wildan Date: Wed, 10 Oct 2018 03:44:06 +0700 Subject: [PATCH 141/411] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 9f3431fc4b..34406f4ed7 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,9 @@ GPU package for CUDA-enabled GPU cards: *pip install tensorflow-gpu* + + + People who are a little more adventurous can also try our nightly binaries: **Nightly pip packages** -- GitLab From 7b2f26280df8dee266d66e01a7ffac7a7eb25247 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 13:51:27 -0700 Subject: [PATCH 142/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216416117 --- tensorflow/go/op/wrappers.go | 728 +++++++++++++++++------------------ 1 file changed, 364 insertions(+), 364 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index eb6df2af46..f35117084a 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -4396,6 +4396,172 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Forwards `data` to the output port determined by `pred`. +// +// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise, +// the data goes to `output_false`. +// +// See also `RefSwitch` and `Merge`. +// +// Arguments: +// data: The tensor to be forwarded to the appropriate output. +// pred: A scalar that specifies which output port will receive data. +// +// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output. +func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Switch", + Input: []tf.Input{ + data, pred, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// AudioSpectrogramAttr is an optional argument to AudioSpectrogram. +type AudioSpectrogramAttr func(optionalAttr) + +// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value. +// +// value: Whether to return the squared magnitude or just the +// magnitude. Using squared magnitude can avoid extra calculations. +// If not specified, defaults to false +func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr { + return func(m optionalAttr) { + m["magnitude_squared"] = value + } +} + +// Produces a visualization of audio data over time. +// +// Spectrograms are a standard way of representing audio information as a series of +// slices of frequency information, one slice for each window of time. By joining +// these together into a sequence, they form a distinctive fingerprint of the sound +// over time. +// +// This op expects to receive audio data as an input, stored as floats in the range +// -1 to 1, together with a window width in samples, and a stride specifying how +// far to move the window between slices. From this it generates a three +// dimensional output. The lowest dimension has an amplitude value for each +// frequency during that time slice. The next dimension is time, with successive +// frequency slices. The final dimension is for the channels in the input, so a +// stereo audio input would have two here for example. +// +// This means the layout when converted and saved as an image is rotated 90 degrees +// clockwise from a typical spectrogram. Time is descending down the Y axis, and +// the frequency decreases from left to right. +// +// Each value in the result represents the square root of the sum of the real and +// imaginary parts of an FFT on the current window of samples. In this way, the +// lowest dimension represents the power of each frequency in the current window, +// and adjacent windows are concatenated in the next dimension. +// +// To get a more intuitive and visual look at what this operation does, you can run +// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the +// resulting spectrogram as a PNG image. +// +// Arguments: +// input: Float representation of audio data. +// window_size: How wide the input window is in samples. For the highest efficiency +// this should be a power of two, but other values are accepted. +// stride: How widely apart the center of adjacent sample windows should be. +// +// Returns 3D representation of the audio frequencies as an image. +func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"window_size": window_size, "stride": stride} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSpectrogram", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder. +type CTCBeamSearchDecoderAttr func(optionalAttr) + +// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value. +// +// value: If true, merge repeated classes in output. +// If not specified, defaults to true +func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr { + return func(m optionalAttr) { + m["merge_repeated"] = value + } +} + +// Performs beam search decoding on the logits given in input. +// +// A note about the attribute merge_repeated: For the beam search decoder, +// this means that if consecutive entries in a beam are the same, only +// the first of these is emitted. That is, when the top path is "A B B B B", +// "A B" is returned if merge_repeated = True but "A B B B B" is +// returned if merge_repeated = False. +// +// Arguments: +// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. +// sequence_length: A vector containing sequence lengths, size `(batch)`. +// beam_width: A scalar >= 0 (beam search beam width). +// top_paths: A scalar >= 0, <= beam_width (controls output size). +// +// Returns A list (length: top_paths) of indices matrices. Matrix j, +// size `(total_decoded_outputs[j] x 2)`, has indices of a +// `SparseTensor`. The rows store: [batch, time].A list (length: top_paths) of values vectors. Vector j, +// size `(length total_decoded_outputs[j])`, has the values of a +// `SparseTensor`. The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector. Vector j, +// size `(2)`, stores the shape of the decoded `SparseTensor[j]`. +// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`. The +// sequence log-probabilities. +func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CTCBeamSearchDecoder", + Input: []tf.Input{ + inputs, sequence_length, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + log_probability = op.Output(idx) + return decoded_indices, decoded_values, decoded_shape, log_probability +} + // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. type ResourceStridedSliceAssignAttr func(optionalAttr) @@ -5662,90 +5828,6 @@ func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_i return op.Output(0) } -// Computes natural logarithm of (1 + x) element-wise. -// -// I.e., \\(y = \log_e (1 + x)\\). -func Log1p(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Log1p", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes rectified linear 6 gradients for a Relu6 operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu6 operation. -// features: The features passed as input to the corresponding Relu6 operation, or -// its output; using either one produces the same result. -// -// Returns The gradients: -// `gradients * (features > 0) * (features < 6)`. -func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu6Grad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeBicubicAttr is an optional argument to ResizeBicubic. -type ResizeBicubicAttr func(optionalAttr) - -// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using bicubic interpolation. -// -// Input images can be of different types but output images are always float. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBicubic", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes natural logarithm of x element-wise. // // I.e., \\(y = \log_e x\\). @@ -5886,146 +5968,6 @@ func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// AudioSpectrogramAttr is an optional argument to AudioSpectrogram. -type AudioSpectrogramAttr func(optionalAttr) - -// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value. -// -// value: Whether to return the squared magnitude or just the -// magnitude. Using squared magnitude can avoid extra calculations. -// If not specified, defaults to false -func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr { - return func(m optionalAttr) { - m["magnitude_squared"] = value - } -} - -// Produces a visualization of audio data over time. -// -// Spectrograms are a standard way of representing audio information as a series of -// slices of frequency information, one slice for each window of time. By joining -// these together into a sequence, they form a distinctive fingerprint of the sound -// over time. -// -// This op expects to receive audio data as an input, stored as floats in the range -// -1 to 1, together with a window width in samples, and a stride specifying how -// far to move the window between slices. From this it generates a three -// dimensional output. The lowest dimension has an amplitude value for each -// frequency during that time slice. The next dimension is time, with successive -// frequency slices. The final dimension is for the channels in the input, so a -// stereo audio input would have two here for example. -// -// This means the layout when converted and saved as an image is rotated 90 degrees -// clockwise from a typical spectrogram. Time is descending down the Y axis, and -// the frequency decreases from left to right. -// -// Each value in the result represents the square root of the sum of the real and -// imaginary parts of an FFT on the current window of samples. In this way, the -// lowest dimension represents the power of each frequency in the current window, -// and adjacent windows are concatenated in the next dimension. -// -// To get a more intuitive and visual look at what this operation does, you can run -// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the -// resulting spectrogram as a PNG image. -// -// Arguments: -// input: Float representation of audio data. -// window_size: How wide the input window is in samples. For the highest efficiency -// this should be a power of two, but other values are accepted. -// stride: How widely apart the center of adjacent sample windows should be. -// -// Returns 3D representation of the audio frequencies as an image. -func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"window_size": window_size, "stride": stride} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSpectrogram", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder. -type CTCBeamSearchDecoderAttr func(optionalAttr) - -// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value. -// -// value: If true, merge repeated classes in output. -// If not specified, defaults to true -func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr { - return func(m optionalAttr) { - m["merge_repeated"] = value - } -} - -// Performs beam search decoding on the logits given in input. -// -// A note about the attribute merge_repeated: For the beam search decoder, -// this means that if consecutive entries in a beam are the same, only -// the first of these is emitted. That is, when the top path is "A B B B B", -// "A B" is returned if merge_repeated = True but "A B B B B" is -// returned if merge_repeated = False. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// sequence_length: A vector containing sequence lengths, size `(batch)`. -// beam_width: A scalar >= 0 (beam search beam width). -// top_paths: A scalar >= 0, <= beam_width (controls output size). -// -// Returns A list (length: top_paths) of indices matrices. Matrix j, -// size `(total_decoded_outputs[j] x 2)`, has indices of a -// `SparseTensor`. The rows store: [batch, time].A list (length: top_paths) of values vectors. Vector j, -// size `(length total_decoded_outputs[j])`, has the values of a -// `SparseTensor`. The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector. Vector j, -// size `(2)`, stores the shape of the decoded `SparseTensor[j]`. -// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`. The -// sequence log-probabilities. -func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCBeamSearchDecoder", - Input: []tf.Input{ - inputs, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - log_probability = op.Output(idx) - return decoded_indices, decoded_values, decoded_shape, log_probability -} - // MatrixInverseAttr is an optional argument to MatrixInverse. type MatrixInverseAttr func(optionalAttr) @@ -9615,25 +9557,109 @@ func DecodeRawLittleEndian(value bool) DecodeRawAttr { // Reinterpret the bytes of a string as a vector of numbers. // -// Arguments: -// bytes: All the elements must have the same length. +// Arguments: +// bytes: All the elements must have the same length. +// +// +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeRaw", + Input: []tf.Input{ + bytes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes natural logarithm of (1 + x) element-wise. +// +// I.e., \\(y = \log_e (1 + x)\\). +func Log1p(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Log1p", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes rectified linear 6 gradients for a Relu6 operation. +// +// Arguments: +// gradients: The backpropagated gradients to the corresponding Relu6 operation. +// features: The features passed as input to the corresponding Relu6 operation, or +// its output; using either one produces the same result. +// +// Returns The gradients: +// `gradients * (features > 0) * (features < 6)`. +func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Relu6Grad", + Input: []tf.Input{ + gradients, features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResizeBicubicAttr is an optional argument to ResizeBicubic. +type ResizeBicubicAttr func(optionalAttr) + +// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Resize `images` to `size` using bicubic interpolation. +// +// Input images can be of different types but output images are always float. // +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeRaw", + Type: "ResizeBicubic", Input: []tf.Input{ - bytes, + images, size, }, Attrs: attrs, } @@ -9641,6 +9667,52 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } +// Greedily selects a subset of bounding boxes in descending order of score, +// +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Note that this +// algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// +// selected_indices = tf.image.non_max_suppression_v2( +// boxes, scores, max_output_size, iou_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) +// +// Arguments: +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. +// +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NonMaxSuppressionV2", + Input: []tf.Input{ + boxes, scores, max_output_size, iou_threshold, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // RandomShuffleAttr is an optional argument to RandomShuffle. type RandomShuffleAttr func(optionalAttr) @@ -19332,65 +19404,6 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_ return op.Output(0) } -// Computes the sum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \sum_j data_j\\) where sum is over `j` such -// that `segment_ids[j] == i`. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentSum", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the lines of one or more text files. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns the set of files matching one or more glob patterns. // // Note that this routine only supports wildcard characters in the @@ -21888,6 +21901,65 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp return op.Output(0), op.Output(1), op.Output(2) } +// Creates a dataset that emits the lines of one or more text files. +// +// Arguments: +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TextLineDataset", + Input: []tf.Input{ + filenames, compression_type, buffer_size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \sum_j data_j\\) where sum is over `j` such +// that `segment_ids[j] == i`. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentSum", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the mean along segments of a tensor. // // Read @@ -27977,52 +28049,6 @@ func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) return op.Output(0) } -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV2", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Greedily selects a subset of bounding boxes in descending order of score, // // pruning away boxes that have high intersection-over-union (IOU) overlap @@ -33131,29 +33157,3 @@ func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } - -// Forwards `data` to the output port determined by `pred`. -// -// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise, -// the data goes to `output_false`. -// -// See also `RefSwitch` and `Merge`. -// -// Arguments: -// data: The tensor to be forwarded to the appropriate output. -// pred: A scalar that specifies which output port will receive data. -// -// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output. -func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Switch", - Input: []tf.Input{ - data, pred, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} -- GitLab From 1f556d3a4172c30cf461e7e66334b70ffad2d559 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 9 Oct 2018 14:03:23 -0700 Subject: [PATCH 143/411] Do not create a graph as a global variable in tests. PiperOrigin-RevId: 216418324 --- .../copy_graph/python/util/copy_test.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py index ba97c78456..4d8651a79f 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_test.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py @@ -26,15 +26,16 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test -graph1 = ops.Graph() -graph2 = ops.Graph() - class CopyVariablesTest(test.TestCase): + def setUp(self): + self.graph1 = ops.Graph() + self.graph2 = ops.Graph() + def testVariableCopy(self): - with graph1.as_default(): + with self.graph1.as_default(): #Define a Variable in graph1 some_var = variables.VariableV1(2) #Initialize session @@ -43,13 +44,15 @@ class CopyVariablesTest(test.TestCase): variables.global_variables_initializer().run(session=sess1) #Make a copy of some_var in the defsult scope in graph2 - copy1 = copy_elements.copy_variable_to_graph(some_var, graph2) + copy1 = copy_elements.copy_variable_to_graph(some_var, self.graph2) #Make another copy with different scope - copy2 = copy_elements.copy_variable_to_graph(some_var, graph2, "test_scope") + copy2 = copy_elements.copy_variable_to_graph(some_var, + self.graph2, + "test_scope") #Initialize both the copies - with graph2.as_default(): + with self.graph2.as_default(): #Initialize Session sess2 = session_lib.Session() #Initialize the Variables @@ -67,9 +70,13 @@ class CopyVariablesTest(test.TestCase): class CopyOpsTest(test.TestCase): + def setUp(self): + self.graph1 = ops.Graph() + self.graph2 = ops.Graph() + def testOpsCopy(self): - with graph1.as_default(): + with self.graph1.as_default(): #Initialize a basic expression y = ax + b x = array_ops.placeholder("float") a = variables.VariableV1(3.0) @@ -82,21 +89,21 @@ class CopyOpsTest(test.TestCase): variables.global_variables_initializer().run(session=sess1) #First, initialize a as a Variable in graph2 - a1 = copy_elements.copy_variable_to_graph(a, graph2) + a1 = copy_elements.copy_variable_to_graph(a, self.graph2) #Initialize a1 in graph2 - with graph2.as_default(): + with self.graph2.as_default(): #Initialize session sess2 = session_lib.Session() #Initialize the Variable variables.global_variables_initializer().run(session=sess2) #Initialize a copy of y in graph2 - y1 = copy_elements.copy_op_to_graph(y, graph2, [a1]) + y1 = copy_elements.copy_op_to_graph(y, self.graph2, [a1]) #Now that y has been copied, x must be copied too. #Get that instance - x1 = copy_elements.get_copied_op(x, graph2) + x1 = copy_elements.get_copied_op(x, self.graph2) #Compare values of y & y1 for a sample input #and check if they match -- GitLab From 5785c0202f4f84c464ef22d0ff180730813f59f3 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 9 Oct 2018 14:04:23 -0700 Subject: [PATCH 144/411] Improve the control flow conversion for loops by using dataflow analysis to construct the state. This is part of a larger refactoring which removes the reliance on the deprecated Scope.created field. PiperOrigin-RevId: 216418556 --- .../autograph/converters/control_flow.py | 162 ++++++++++-------- .../autograph/converters/control_flow_test.py | 4 +- .../python/autograph/pyct/qual_names.py | 3 + 3 files changed, 93 insertions(+), 76 deletions(-) diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py index 416a60d2ee..70879f6c97 100644 --- a/tensorflow/python/autograph/converters/control_flow.py +++ b/tensorflow/python/autograph/converters/control_flow.py @@ -90,23 +90,11 @@ class ControlFlowTransformer(converter.Base): return templates.replace( template, test=test, body_name=body_name, orelse_name=orelse_name) - def _fmt_symbol_list(self, symbol_set): + def _fmt_symbols(self, symbol_set): if not symbol_set: return 'no variables' return ', '.join(map(str, symbol_set)) - def _validate_no_live_vars_created(self, node): - body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) - live_vars_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT) - live_vars_created_in_body = live_vars_out & body_scope.created - if live_vars_created_in_body: - raise ValueError( - 'The following variables are created inside the loop and used later:' - '\n%s\n' - 'Variables must be declared outside loops because loops may not' - ' necessarily execute.' % self._fmt_symbol_list( - live_vars_created_in_body)) - def visit_If(self, node): node = self.generic_visit(node) @@ -138,8 +126,8 @@ class ControlFlowTransformer(converter.Base): ' creates %s, while the false branch creates %s. Make sure all' ' these variables are initialized either in both' ' branches or before the if statement.' % - (self._fmt_symbol_list(created_in_body), - self._fmt_symbol_list(created_in_orelse))) + (self._fmt_symbols(created_in_body), + self._fmt_symbols(created_in_orelse))) # Alias the closure variables inside the conditional functions, to allow # the functions access to the respective variables. @@ -206,51 +194,97 @@ class ControlFlowTransformer(converter.Base): return body_def + orelse_def + cond_expr - def visit_While(self, node): - self.generic_visit(node) - - self._validate_no_live_vars_created(node) - + def _get_loop_state(self, node): body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) - body_closure = body_scope.modified - body_scope.created - all_referenced = body_scope.referenced - - cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE) - cond_closure = set() - for s in cond_scope.used: - for root in s.support_set: - if root not in body_scope.created: - cond_closure.add(root) - - state = list(body_closure) - if not state: + defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN) + live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN) + live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT) + reserved_symbols = body_scope.referenced + + # Note that it doesn't matter whether the variables are live after the loop. + # If the loop modifies them nonlocally (e.g. the result of an iteration + # depends on the previous iteration), then they need to be included in + # the loop state, regardless of whether they are later used or not. + loop_state = body_scope.modified & live_in + + undefined_lives = loop_state - defined_in + # Only simple variables must be defined. The composite ones will be + # implicitly checked at runtime. + undefined_simple_lives = {v for v in undefined_lives if v.is_simple()} + if undefined_simple_lives: + raise NameError( + 'cannot convert loop: it includes symbols that are undefined' + ' when entering the loop: {}'.format( + self._fmt_symbols(undefined_simple_lives))) + + live_defs_in_loop = (body_scope.modified - live_in) & live_out + if live_defs_in_loop: + # TODO(mdan): Include reference to explanation why. + raise NotImplementedError( + 'cannot convert loop: it includes symbols that are defined' + ' inside the loop, but used later: {}. To fix, initialize' + ' these symbols before the loop'.format( + self._fmt_symbols(live_defs_in_loop))) + + if not loop_state: # TODO(mdan): Implement this properly. - # To complete this statement, we need to check whether any variable - # created inside the body scope is used before being modified outside the - # scope. This should be done during activity analysis, and in general - # should cover the case where variables may not be initialized. - raise ValueError('cannot convert while loop: no outputs') + # We need to check whether any variable created inside the body scope + # is used before being modified outside the scope. This should be done + # during activity analysis, and in general should cover the case where + # variables may not be initialized. + raise ValueError('cannot convert loop: no outputs') + + return loop_state, reserved_symbols + def _state_constructs(self, loop_state, reserved_symbols): + loop_state = list(loop_state) state_ssf = [ - self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state + self.ctx.namer.new_symbol(s.ssf(), reserved_symbols) for s in loop_state ] ssf_map = { name: ssf - for name, ssf in zip(state, state_ssf) + for name, ssf in zip(loop_state, state_ssf) if str(name) != ssf } - if len(state) == 1: - state = state[0] + if len(loop_state) == 1: + loop_state = loop_state[0] state_ssf = state_ssf[0] - state_ast_tuple = state + state_ast_tuple = loop_state else: - state_ast_tuple = gast.Tuple([n.ast() for n in state], None) + state_ast_tuple = gast.Tuple([n.ast() for n in loop_state], None) + + return loop_state, state_ssf, state_ast_tuple, ssf_map + + def visit_While(self, node): + self.generic_visit(node) + loop_state, reserved_symbols = self._get_loop_state(node) + + # Note: one might expect we can dispatch based on the loop condition. + # But because that is dependent on the state, it cannot be evaluated ahead + # of time - doing that would risk duplicating any effects the condition has. + # Furthermore, we cannot evaluate slices and attributes, because they might + # trigger __getitem__ or __getattribute__. + # + # A case where this fails includes ops with side effects on a stateful + # resource captured in an object: + # + # while self.v.read() > 0: + # self.v.assign(1) + # + # TODO(mdan): Handle the case above. + cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE) + cond_closure = set() + for s in cond_scope.used: + cond_closure.update(s.support_set) + cond_closure -= loop_state + + loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs( + loop_state, reserved_symbols) node_body = ast_util.rename_symbols(node.body, ssf_map) test = ast_util.rename_symbols(node.test, ssf_map) - # TODO(b/113118541) investigate the need-for and correctness-of extra_deps template = """ def test_name(state_ssf): return test @@ -262,12 +296,12 @@ class ControlFlowTransformer(converter.Base): """ node = templates.replace( template, - state=state, + state=loop_state, state_ssf=state_ssf, state_ast_tuple=state_ast_tuple, - test_name=self.ctx.namer.new_symbol('loop_test', body_scope.referenced), + test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols), test=test, - body_name=self.ctx.namer.new_symbol('loop_body', body_scope.referenced), + body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols), body=node_body, extra_deps=tuple(s.ast() for s in cond_closure), ) @@ -277,30 +311,9 @@ class ControlFlowTransformer(converter.Base): def visit_For(self, node): self.generic_visit(node) - self._validate_no_live_vars_created(node) - - body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) - body_closure = body_scope.modified - body_scope.created - all_referenced = body_scope.referenced - - state = list(body_closure) - - state_ssf = [ - self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state - ] - ssf_map = { - name: ssf - for name, ssf in zip(state, state_ssf) - if str(name) != ssf - } - - if len(state) == 1: - state = state[0] - state_ssf = state_ssf[0] - state_ast_tuple = state - else: - state_ast_tuple = gast.Tuple([n.ast() for n in state], None) - + loop_state, reserved_symbols = self._get_loop_state(node) + loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs( + loop_state, reserved_symbols) node_body = ast_util.rename_symbols(node.body, ssf_map) if anno.hasanno(node, 'extra_test'): extra_test = anno.getanno(node, 'extra_test') @@ -321,14 +334,15 @@ class ControlFlowTransformer(converter.Base): """ node = templates.replace( template, - state=state, + state=loop_state, state_ssf=state_ssf, state_ast_tuple=state_ast_tuple, iter_=node.iter, iterate=node.target, - extra_test_name=self.ctx.namer.new_symbol('extra_test', all_referenced), + extra_test_name=self.ctx.namer.new_symbol('extra_test', + reserved_symbols), extra_test_expr=extra_test, - body_name=self.ctx.namer.new_symbol('loop_body', all_referenced), + body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols), body=node_body) return node diff --git a/tensorflow/python/autograph/converters/control_flow_test.py b/tensorflow/python/autograph/converters/control_flow_test.py index cfa0ea920c..03fdfc804e 100644 --- a/tensorflow/python/autograph/converters/control_flow_test.py +++ b/tensorflow/python/autograph/converters/control_flow_test.py @@ -83,7 +83,7 @@ class ControlFlowTest(converter_testing.TestCase): return s node, ctx = self.prepare(bad_while_loop, {}) - with self.assertRaises(transformer.AutographParseError): + with self.assertRaises(NameError): control_flow.transform(node, ctx) def test_if_basic(self): @@ -232,7 +232,7 @@ class ControlFlowTest(converter_testing.TestCase): return s node, ctx = self.prepare(bad_for_loop, {}) - with self.assertRaises(transformer.AutographParseError): + with self.assertRaises(NameError): control_flow.transform(node, ctx) def test_for_tuple_unpacking(self): diff --git a/tensorflow/python/autograph/pyct/qual_names.py b/tensorflow/python/autograph/pyct/qual_names.py index 334cbd7d38..6ad6199acf 100644 --- a/tensorflow/python/autograph/pyct/qual_names.py +++ b/tensorflow/python/autograph/pyct/qual_names.py @@ -99,6 +99,9 @@ class QN(object): def is_symbol(self): return isinstance(self.qn[0], str) + def is_simple(self): + return len(self.qn) <= 1 + def is_composite(self): return len(self.qn) > 1 -- GitLab From 5c6ea51834ee410586233d67d43bdb4f1729261f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 14:07:03 -0700 Subject: [PATCH 145/411] Internal Change PiperOrigin-RevId: 216419037 --- tensorflow/contrib/lite/build_def.bzl | 2 ++ tensorflow/contrib/lite/testing/BUILD | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index b3607a761c..05efee18e7 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -345,6 +345,7 @@ def generated_test_models_all(): tags = [] if test in failing_tests: tags.append("notap") + tags.append("manual") if conversion_mode: test += "_%s" % conversion_mode options.append((conversion_mode, test, tags)) @@ -450,6 +451,7 @@ def gen_full_model_test(conversion_modes, models, data, test_suite_tag): "no_oss", "no_windows", "notap", + "manual", ] + [test_suite_tag], deps = [ "//tensorflow/contrib/lite/testing:model_coverage_lib", diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 45baad782a..2edd420fea 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -380,6 +380,7 @@ py_test( srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"], srcs_version = "PY2AND3", tags = [ + "manual", "no_oss", "no_pip", "no_windows", -- GitLab From 4fa59ef694c19dc63d574b2d6a349cd753d9cdbd Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 9 Oct 2018 14:11:06 -0700 Subject: [PATCH 146/411] [tf.data] Lift parameterized test parameters into lambdas if they create TF ops. The existing code triggers parts of the TensorFlow runtime that may not have been fully initialized at the time the parameters are evaluated. Lifting into a lambda and invoking the lambda inside the test method will achieve the proper order. PiperOrigin-RevId: 216419757 --- tensorflow/python/data/util/structure_test.py | 61 ++++++++++--------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/data/util/structure_test.py b/tensorflow/python/data/util/structure_test.py index 2982763181..630a0c912b 100644 --- a/tensorflow/python/data/util/structure_test.py +++ b/tensorflow/python/data/util/structure_test.py @@ -34,52 +34,56 @@ from tensorflow.python.platform import test class StructureTest(test.TestCase, parameterized.TestCase): - # pylint disable=protected-access + # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they + # will be executed before the (eager- or graph-mode) test environment has been + # set up. + # pylint: disable=g-long-lambda,protected-access @parameterized.parameters( - (constant_op.constant(37.0), structure.TensorStructure, [dtypes.float32], - [[]]), (sparse_tensor.SparseTensor( - indices=[[3, 4]], values=[-1], dense_shape=[4, 5]), - structure.SparseTensorStructure, [dtypes.variant], [[3]]), - ((constant_op.constant(37.0), constant_op.constant([1, 2, 3])), - structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), ({ - "a": constant_op.constant(37.0), - "b": constant_op.constant([1, 2, 3]) - }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), - ({ - "a": - constant_op.constant(37.0), + (lambda: constant_op.constant(37.0), structure.TensorStructure, + [dtypes.float32], [[]]), + (lambda: sparse_tensor.SparseTensor( + indices=[[3, 4]], values=[-1], dense_shape=[4, 5]), + structure.SparseTensorStructure, [dtypes.variant], [[3]]), + (lambda: (constant_op.constant(37.0), constant_op.constant([1, 2, 3])), + structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), + (lambda: { + "a": constant_op.constant(37.0), + "b": constant_op.constant([1, 2, 3]) + }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), + (lambda: { + "a": constant_op.constant(37.0), "b": (sparse_tensor.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), sparse_tensor.SparseTensor( indices=[[3, 4]], values=[-1], dense_shape=[4, 5])) }, structure.NestedStructure, [dtypes.float32, dtypes.variant, dtypes.variant], [[], [3], [3]])) - def testFlatStructure(self, value, expected_structure, expected_types, + def testFlatStructure(self, value_fn, expected_structure, expected_types, expected_shapes): + value = value_fn() s = structure.Structure.from_value(value) self.assertIsInstance(s, expected_structure) self.assertEqual(expected_types, s._flat_types) self.assertEqual(expected_shapes, s._flat_shapes) @parameterized.parameters( - (constant_op.constant(37.0), [ + (lambda: constant_op.constant(37.0), lambda: [ constant_op.constant(38.0), array_ops.placeholder(dtypes.float32), variables.Variable(100.0), 42.0, np.array(42.0, dtype=np.float32) - ], [constant_op.constant([1.0, 2.0]), - constant_op.constant(37)]), - (sparse_tensor.SparseTensor( + ], lambda: [constant_op.constant([1.0, 2.0]), constant_op.constant(37)]), + (lambda: sparse_tensor.SparseTensor( indices=[[3, 4]], values=[-1], dense_shape=[4, 5]), - [ + lambda: [ sparse_tensor.SparseTensor( indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]), sparse_tensor.SparseTensorValue( indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]), array_ops.sparse_placeholder(dtype=dtypes.int32), array_ops.sparse_placeholder(dtype=dtypes.int32, shape=[None, None]) - ], [ + ], lambda: [ constant_op.constant(37, shape=[4, 5]), sparse_tensor.SparseTensor( indices=[[3, 4]], values=[-1], dense_shape=[5, 6]), @@ -88,13 +92,13 @@ class StructureTest(test.TestCase, parameterized.TestCase): sparse_tensor.SparseTensor( indices=[[3, 4]], values=[-1.0], dense_shape=[4, 5]) ]), - ({ + (lambda: { "a": constant_op.constant(37.0), "b": constant_op.constant([1, 2, 3]) - }, [{ + }, lambda: [{ "a": constant_op.constant(15.0), "b": constant_op.constant([4, 5, 6]) - }], [{ + }], lambda: [{ "a": constant_op.constant(15.0), "b": constant_op.constant([4, 5, 6, 7]) }, { @@ -108,8 +112,11 @@ class StructureTest(test.TestCase, parameterized.TestCase): indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3]) }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]), ) - def testIsCompatibleWithStructure(self, original_value, compatible_values, - incompatible_values): + def testIsCompatibleWithStructure( + self, original_value_fn, compatible_values_fn, incompatible_values_fn): + original_value = original_value_fn() + compatible_values = compatible_values_fn() + incompatible_values = incompatible_values_fn() s = structure.Structure.from_value(original_value) for compatible_value in compatible_values: self.assertTrue( @@ -120,10 +127,6 @@ class StructureTest(test.TestCase, parameterized.TestCase): s.is_compatible_with( structure.Structure.from_value(incompatible_value))) - # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they - # will be executed before the (eager- or graph-mode) test environment has been - # set up. - # pylint: disable=g-long-lambda @parameterized.parameters( (lambda: constant_op.constant(37.0),), (lambda: sparse_tensor.SparseTensor( -- GitLab From b145f46b735fe1e383be6629cafaa5269b07b7fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 14:12:25 -0700 Subject: [PATCH 147/411] Add support for time-major input in the bidirectional RNN Op. PiperOrigin-RevId: 216419983 --- .../kernels/bidirectional_sequence_rnn.cc | 251 ++++++++++++------ .../bidirectional_sequence_rnn_test.cc | 94 +++++-- 2 files changed, 247 insertions(+), 98 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc index c22a457a71..f544dd5ffa 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc @@ -114,8 +114,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, input->dims->size, 3); - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int fw_num_units = fw_input_weights->dims->data[0]; const int bw_num_units = bw_input_weights->dims->data[0]; TF_LITE_ASSERT_EQ(input->dims->data[2], fw_input_weights->dims->data[1]); @@ -237,8 +240,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Resize outputs. TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor); TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3); - fw_output_size_array->data[0] = batch_size; - fw_output_size_array->data[1] = max_time; + fw_output_size_array->data[0] = (time_major) ? max_time : batch_size; + fw_output_size_array->data[1] = (time_major) ? batch_size : max_time; fw_output_size_array->data[2] = params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; TF_LITE_ENSURE_OK( @@ -266,8 +269,11 @@ TfLiteStatus EvalFloat( const TfLiteBidirectionalSequenceRNNParams* params, TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) { - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int input_size = input->dims->data[2]; const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0; @@ -292,48 +298,91 @@ TfLiteStatus EvalFloat( params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; const int bw_output_step = params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units; - for (int b = 0; b < batch_size; b++) { + if (time_major) { + // TODO(mirkov): add merge_outputs support for time_major inputs. + TF_LITE_ASSERT_EQ(params->merge_outputs, false); + // Forward cell. - float* fw_hidden_state_ptr_batch = - fw_hidden_state->data.f + b * fw_num_units; - float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time; + float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f; for (int s = 0; s < max_time; s++) { const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; + input->data.f + s * input_size * batch_size; const float* aux_input_ptr_batch = (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size + ? aux_input->data.f + s * input_size * batch_size : nullptr; - float* output_ptr_batch = fw_output_offset + s * fw_output_step; + float* output_ptr_batch = + fw_output->data.f + s * fw_num_units * batch_size; kernel_utils::RnnBatchStep( input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch, fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr, - input_size, aux_input_size, fw_num_units, /*batch_size=*/1, + input_size, aux_input_size, fw_num_units, batch_size, params->activation, fw_hidden_state_ptr_batch, output_ptr_batch); } // Backward cell. - float* bw_hidden_state_ptr_batch = - bw_hidden_state->data.f + b * bw_num_units; - float* bw_output_offset = - params->merge_outputs - ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units - : bw_output->data.f + b * bw_output_step * max_time; + float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f; for (int s = max_time - 1; s >= 0; s--) { const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; + input->data.f + s * input_size * batch_size; const float* aux_input_ptr_batch = (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size + ? aux_input->data.f + s * input_size * batch_size : nullptr; - float* output_ptr_batch = bw_output_offset + s * bw_output_step; + float* output_ptr_batch = + bw_output->data.f + s * bw_num_units * batch_size; kernel_utils::RnnBatchStep( input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch, bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr, - input_size, aux_input_size, bw_num_units, /*batch_size=*/1, + input_size, aux_input_size, bw_num_units, batch_size, params->activation, bw_hidden_state_ptr_batch, output_ptr_batch); } + } else { + for (int b = 0; b < batch_size; b++) { + // Forward cell. + float* fw_hidden_state_ptr_batch = + fw_hidden_state->data.f + b * fw_num_units; + float* fw_output_offset = + fw_output->data.f + b * fw_output_step * max_time; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = fw_output_offset + s * fw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch, + fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr, + input_size, aux_input_size, fw_num_units, /*batch_size=*/1, + params->activation, fw_hidden_state_ptr_batch, output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = + bw_hidden_state->data.f + b * bw_num_units; + float* bw_output_offset = + params->merge_outputs + ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units + : bw_output->data.f + b * bw_output_step * max_time; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = bw_output_offset + s * bw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch, + bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr, + input_size, aux_input_size, bw_num_units, /*batch_size=*/1, + params->activation, bw_hidden_state_ptr_batch, output_ptr_batch); + } + } } return kTfLiteOk; } @@ -351,8 +400,11 @@ TfLiteStatus EvalHybrid( TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) { - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int input_size = input->dims->data[2]; const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0; @@ -403,55 +455,106 @@ TfLiteStatus EvalHybrid( params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; const int bw_output_step = params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units; - for (int b = 0; b < batch_size; b++) { - // Forward cell. - float* fw_hidden_state_ptr_batch = - fw_hidden_state->data.f + b * fw_num_units; - float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time; - for (int s = 0; s < max_time; s++) { - const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; - const float* aux_input_ptr_batch = - (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size - : nullptr; - float* output_ptr_batch = fw_output_offset + s * fw_output_step; - - kernel_utils::RnnBatchStep( - input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, - aux_input_ptr_batch, aux_fw_input_weights_ptr, - aux_fw_input_weights_scale, fw_recurrent_weights_ptr, - fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, - fw_num_units, /*batch_size=*/1, params->activation, - quantized_input_ptr, aux_quantized_input_ptr, - fw_quantized_hidden_state_ptr, scaling_factors_ptr, - fw_hidden_state_ptr_batch, output_ptr_batch); + if (time_major) { + for (int t = 0; t < max_time; t++) { + // TODO(mirkov): add merge_outputs support for time_major inputs. + TF_LITE_ASSERT_EQ(params->merge_outputs, false); + + // Forward cell. + float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + s * input_size * batch_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + s * input_size * batch_size + : nullptr; + float* output_ptr_batch = + fw_output->data.f + s * fw_num_units * batch_size; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, + aux_input_ptr_batch, aux_fw_input_weights_ptr, + aux_fw_input_weights_scale, fw_recurrent_weights_ptr, + fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, + fw_num_units, batch_size, params->activation, quantized_input_ptr, + aux_quantized_input_ptr, fw_quantized_hidden_state_ptr, + scaling_factors_ptr, fw_hidden_state_ptr_batch, output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + s * input_size * batch_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + s * input_size * batch_size + : nullptr; + float* output_ptr_batch = + bw_output->data.f + s * bw_num_units * batch_size; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, + aux_input_ptr_batch, aux_bw_input_weights_ptr, + aux_bw_input_weights_scale, bw_recurrent_weights_ptr, + bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, + bw_num_units, batch_size, params->activation, quantized_input_ptr, + aux_quantized_input_ptr, bw_quantized_hidden_state_ptr, + scaling_factors_ptr, bw_hidden_state_ptr_batch, output_ptr_batch); + } } - // Backward cell. - float* bw_hidden_state_ptr_batch = - bw_hidden_state->data.f + b * bw_num_units; - float* bw_output_offset = - params->merge_outputs - ? fw_output->data.f + b * bw_output_step * max_time - : bw_output->data.f + b * bw_output_step * max_time; - for (int s = max_time - 1; s >= 0; s--) { - const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; - const float* aux_input_ptr_batch = - (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size - : nullptr; - float* output_ptr_batch = bw_output_offset + s * bw_output_step; - - kernel_utils::RnnBatchStep( - input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, - aux_input_ptr_batch, aux_bw_input_weights_ptr, - aux_bw_input_weights_scale, bw_recurrent_weights_ptr, - bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, - bw_num_units, /*batch_size=*/1, params->activation, - quantized_input_ptr, aux_quantized_input_ptr, - bw_quantized_hidden_state_ptr, scaling_factors_ptr, - bw_hidden_state_ptr_batch, output_ptr_batch); + } else { + for (int b = 0; b < batch_size; b++) { + // Forward cell. + float* fw_hidden_state_ptr_batch = + fw_hidden_state->data.f + b * fw_num_units; + float* fw_output_offset = + fw_output->data.f + b * fw_output_step * max_time; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = fw_output_offset + s * fw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, + aux_input_ptr_batch, aux_fw_input_weights_ptr, + aux_fw_input_weights_scale, fw_recurrent_weights_ptr, + fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, + fw_num_units, /*batch_size=*/1, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + fw_quantized_hidden_state_ptr, scaling_factors_ptr, + fw_hidden_state_ptr_batch, output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = + bw_hidden_state->data.f + b * bw_num_units; + float* bw_output_offset = + params->merge_outputs + ? fw_output->data.f + b * bw_output_step * max_time + : bw_output->data.f + b * bw_output_step * max_time; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = bw_output_offset + s * bw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, + aux_input_ptr_batch, aux_bw_input_weights_ptr, + aux_bw_input_weights_scale, bw_recurrent_weights_ptr, + bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, + bw_num_units, /*batch_size=*/1, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + bw_quantized_hidden_state_ptr, scaling_factors_ptr, + bw_hidden_state_ptr_batch, output_ptr_batch); + } } } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc index f555c472f5..6c179ca05d 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc @@ -654,7 +654,8 @@ const std::initializer_list recurrent_weights = { class BidirectionalRNNOpModel : public SingleOpModel { public: BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units, - int bw_units, int input_size, bool merge_outputs) + int bw_units, int input_size, bool time_major, + bool merge_outputs) : batches_(batches), sequence_len_(sequence_len), fw_units_(fw_units), @@ -679,25 +680,29 @@ class BidirectionalRNNOpModel : public SingleOpModel { bw_output_ = AddOutput(TensorType_FLOAT32); } - SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, - BuiltinOptions_BidirectionalSequenceRNNOptions, - CreateBidirectionalSequenceRNNOptions( - builder_, /*time_major=*/false, - ActivationFunctionType_RELU, merge_outputs) - .Union()); + SetBuiltinOp( + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOptions_BidirectionalSequenceRNNOptions, + CreateBidirectionalSequenceRNNOptions( + builder_, time_major, ActivationFunctionType_RELU, merge_outputs) + .Union()); + const auto input_shape = + (time_major) ? std::vector({sequence_len_, batches_, input_size_}) + : std::vector({batches_, sequence_len_, input_size_}); + BuildInterpreter({ - {batches_, sequence_len_, input_size_}, // input - {fw_units_, input_size_}, // fw_weights - {fw_units_, fw_units_}, // fw_recurrent_weights - {fw_units_}, // fw_bias - {batches_, fw_units_}, // fw_hidden_state - {bw_units_, input_size_}, // bw_weights - {bw_units_, bw_units_}, // bw_recurrent_weights - {bw_units_}, // bw_bias - {batches_, bw_units_}, // bw_hidden_state - {batches_, sequence_len_, 0}, // aux_input - {fw_units_, 0}, // aux_fw_weights - {bw_units_, 0}, // aux_bw_weights + input_shape, // input + {fw_units_, input_size_}, // fw_weights + {fw_units_, fw_units_}, // fw_recurrent_weights + {fw_units_}, // fw_bias + {batches_, fw_units_}, // fw_hidden_state + {bw_units_, input_size_}, // bw_weights + {bw_units_, bw_units_}, // bw_recurrent_weights + {bw_units_}, // bw_bias + {batches_, bw_units_}, // bw_hidden_state + {batches_, sequence_len_, 0}, // aux_input + {fw_units_, 0}, // aux_fw_weights + {bw_units_, 0}, // aux_bw_weights }); } @@ -770,7 +775,8 @@ class BidirectionalRNNOpModel : public SingleOpModel { TEST(BidirectionalRNNOpTest, BlackBoxTest) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -803,11 +809,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) { EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected))); } -// Same as the previous test, yet with merged outputs. +// Same as BlackBox test, but input is reshuffled to time_major format. +TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajor) { + BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, + /*fw_units=*/16, /*bw_units=*/16, + /*input_size=*/8, /*time_major=*/true, + /*merge_outputs=*/false); + rnn.SetFwWeights(weights); + rnn.SetBwWeights(weights); + rnn.SetFwBias(biases); + rnn.SetBwBias(biases); + rnn.SetFwRecurrentWeights(recurrent_weights); + rnn.SetBwRecurrentWeights(recurrent_weights); + + // const int input_sequence_size = rnn.input_size() * rnn.sequence_len(); + // Insert the inputs in time_major format. The batch_major format is: + // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as: + // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15]. + for (int i = 0; i < rnn.sequence_len(); i++) { + float* batch_start = rnn_input + i * rnn.input_size(); + float* batch_end = batch_start + rnn.input_size(); + // The two batches are identical. + rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end); + rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end); + } + + rnn.Invoke(); + + std::vector fw_expected; + for (int i = 0; i < rnn.sequence_len(); i++) { + float* golden_fw_start = rnn_golden_fw_output + i * rnn.num_fw_units(); + float* golden_fw_end = golden_fw_start + rnn.num_fw_units(); + fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end); + fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end); + } + EXPECT_THAT(rnn.GetFwOutput(), ElementsAreArray(ArrayFloatNear(fw_expected))); +} + +// Same as BlackBox test, yet with merged outputs. TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/true); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/true); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -845,7 +889,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) { TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -891,7 +936,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) { TEST(BidirectionalRNNOpTest, EndToEndTest) { BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); const int output_size = 4; float dnn_weights[] = { -0.5782342, -0.052212059, 0.73036242, -0.81216097, -0.80088139, -- GitLab From fa1542234857acf56af6e7f0dbe8d2084a18fa00 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 9 Oct 2018 14:19:07 -0700 Subject: [PATCH 148/411] [XLA:GPU] Pattern match atomic "apply" into an atomic store Otherwise we'd emit a CAS loop. PiperOrigin-RevId: 216421161 --- .../compiler/xla/service/gpu/ir_emitter.cc | 15 +++++ .../compiler/xla/service/gpu/tests/BUILD | 12 ++++ .../xla/service/gpu/tests/gpu_atomic_test.cc | 58 +++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index b7c37bcf3c..47102347cb 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -179,6 +179,21 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation( bool is_atomic_integral = element_type == S32 || element_type == U32 || element_type == S64 || element_type == U64; llvm::Value* source = Load(source_address, "source"); + + // kCopy of RHS -> atomic store. + if (root_opcode == HloOpcode::kCopy && + (element_type == F32 || is_atomic_integral) && + computation.root_instruction()->operand(0)->opcode() == + HloOpcode::kParameter && + computation.root_instruction()->operand(0)->parameter_number() == 1) { + llvm::StoreInst* store = Store(source, output_address); + store->setAtomic(llvm::AtomicOrdering::Unordered); + // Derive a minimum alignment from the type. The optimizer can increase it + // later. + store->setAlignment(ShapeUtil::ByteSizeOfPrimitiveType(element_type)); + return true; + } + if (root_opcode == HloOpcode::kAdd) { // NVPTX supports atomicAdd on F32 and integer types. if (element_type == F32) { diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index a725533567..1f0436278c 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -223,3 +223,15 @@ tf_cc_test( "@com_google_absl//absl/strings", ], ) + +tf_cc_test( + name = "gpu_atomic_test", + srcs = ["gpu_atomic_test.cc"], + tags = tf_cuda_tests_tags(), + deps = [ + ":gpu_codegen_test", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc new file mode 100644 index 0000000000..6b18c4c637 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace gpu { +namespace { + +class GpuAtomicTest : public GpuCodegenTest {}; + +TEST_F(GpuAtomicTest, TestStore) { + const char* hlo_string = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } +)"; + + CompileAndVerifyIr(hlo_string, R"( +CHECK: store atomic{{.*}}unordered, align 4 +)"); +} + +} // namespace +} // namespace gpu +} // namespace xla -- GitLab From 35caff957424a60bd7d7e4e92a1ec87f617781c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 14:25:27 -0700 Subject: [PATCH 149/411] Export feature importance for oblivious tree nodes. PiperOrigin-RevId: 216422334 --- .../estimator_batch/custom_export_strategy.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index 48f12a64f9..a3df272e69 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -196,6 +196,10 @@ def convert_to_universal_format(dtec, sorted_feature_names, matching_id = categorical_test.value.add() matching_id.int64_value = split.feature_id node.custom_left_child_test.Pack(categorical_test) + elif (node_type == "oblivious_dense_float_binary_split" or + node_type == "oblivious_categorical_id_binary_split"): + raise ValueError("Universal tree format doesn't support oblivious " + "trees") else: raise ValueError("Unexpected node type %s" % node_type) node.left_child_id.value = split.left_id @@ -229,6 +233,13 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats, split = tree_node.categorical_id_binary_split split_column = feature_names[split.feature_column + num_dense_floats + num_sparse_float] + elif node_type == "oblivious_dense_float_binary_split": + split = tree_node.oblivious_dense_float_binary_split + split_column = feature_names[split.feature_column] + elif node_type == "oblivious_categorical_id_binary_split": + split = tree_node.oblivious_categorical_id_binary_split + split_column = feature_names[split.feature_column + num_dense_floats + + num_sparse_float] elif node_type == "categorical_id_set_membership_binary_split": split = tree_node.categorical_id_set_membership_binary_split split_column = feature_names[split.feature_column + num_dense_floats + -- GitLab From 950cf87104bfee28e2165fe368f66337b8a1336d Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Tue, 9 Oct 2018 14:36:33 -0700 Subject: [PATCH 150/411] [tf.data vectorization] Add vectorizer for `Add` op PiperOrigin-RevId: 216424512 --- tensorflow/core/graph/graph.cc | 2 +- .../optimizers/data/vectorization/BUILD | 34 ++-- .../data/vectorization/add_vectorizer.cc | 150 ++++++++++++++++++ .../optimizers/data/vectorization_utils.cc | 21 +-- .../data/vectorization_utils_test.cc | 103 ++++++++++-- .../optimization/map_vectorization_test.py | 1 + 6 files changed, 280 insertions(+), 31 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 6f068546d2..a17491d4f7 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -34,7 +34,7 @@ namespace tensorflow { const int Graph::kControlSlot = -1; -class NodeProperties { +struct NodeProperties { public: NodeProperties(const OpDef* op_def, const NodeDef& node_def, const DataTypeSlice inputs, const DataTypeSlice outputs) diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD index 985d6c6c3a..09018d0124 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD +++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD @@ -9,7 +9,11 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all") VECTORIZER_DEPS = [ ":vectorizer_registry", + "//tensorflow/cc:ops", "//tensorflow/core/grappler/optimizers/data:graph_utils", + "//tensorflow/core:core_cpu", + "//tensorflow/cc:scope_internal", + "//tensorflow/cc:cc_ops", ] + tf_protos_all() cc_library( @@ -42,6 +46,24 @@ cc_library( ], ) +tf_cc_test( + name = "vectorizer_registry_test", + srcs = ["vectorizer_registry_test.cc"], + deps = [ + ":vectorizer_registry", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ] + tf_protos_all(), +) + +cc_library( + name = "add_vectorizer", + srcs = ["add_vectorizer.cc"], + deps = VECTORIZER_DEPS, + alwayslink = 1, +) + cc_library( name = "cast_vectorizer", srcs = ["cast_vectorizer.cc"], @@ -61,20 +83,10 @@ cc_library( hdrs = ["vectorizer_registry.h"], visibility = ["//visibility:public"], deps = [ + ":add_vectorizer", ":cast_vectorizer", ":unpack_vectorizer", ":vectorizer", ":vectorizer_registry", ], ) - -tf_cc_test( - name = "vectorizer_registry_test", - srcs = ["vectorizer_registry_test.cc"], - deps = [ - ":vectorizer_registry", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ] + tf_protos_all(), -) diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc new file mode 100644 index 0000000000..d90a51b01a --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc @@ -0,0 +1,150 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope_internal.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h" + +namespace tensorflow { +namespace grappler { + +namespace { + +const char* const kExpandDimsPrefix = "vectorized/expanddims/"; + +// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading +// dimension, which may cause automatic broadcasting rules to expand the +// input dimensions wrongly when the unstacked shapes have different ranks. +// To avoid that, we reshape stacked inputs to the maximum rank they need +// to be broadcasted to. +// +// For example, suppose we have inputs A and B, where A is a stacked tensor with +// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with +// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules +// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that +// the dimensions n and 7 are compatible, and if so, create an output of shape +// [12, 7, 5]. However, correct addition of these inputs would create an output +// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A +// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before +// broadcasting. +Status ExpandDimsForBroadcast(std::vector* inputs, Graph* g) { + Status status; + Scope parent = NewInternalScope(g, &status, nullptr); + Scope s = parent.NewSubScope(kExpandDimsPrefix); + + // TODO(rachelim): We can potentially get rid of all these ops if shapes are + // known statically + + Output const_0 = ops::Const(s, 0); + Output const_1 = ops::Const(s, 1); + + std::vector ranks; + ranks.reserve(inputs->size()); + + // Get the stacked rank of each input + for (const auto& input : *inputs) { + Output rank = ops::Rank(s, Output(input.node, input.output_index)); + + if (!input.stacked) { + // If the input is unstacked, add 1 + rank = ops::Add(s, rank, const_1); + } + + ranks.push_back(rank); + } + + // Pack the ranks into one tensor to get the max + Output packed_ranks = ops::Stack(s, ranks); + + Output max_rank = + ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true)); + + std::vector expanded_inputs; + expanded_inputs.reserve(inputs->size()); + + // For all inputs that are stacked, expand dimensions after dim 0. + for (size_t i = 0; i < inputs->size(); ++i) { + if (!inputs->at(i).stacked) { + expanded_inputs.push_back(inputs->at(i)); + continue; + } + + Output input(inputs->at(i).node, inputs->at(i).output_index); + + // Number of dimensions to expand + Output rank_diff = ops::Sub(s, max_rank, ranks[i]); + + // [1] * rank_diff + Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff); + + Output const_vec_1 = ops::Const(s, {1}); + + Output shape = ops::Shape(s, input); + + // shape[:1] + Output concat_pre = + ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, + ops::StridedSlice::Attrs().BeginMask(1)); + + // shape[1:] + Output concat_post = + ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, + ops::StridedSlice::Attrs().EndMask(1)); + + // tf.concat([shape[:1], ones, shape[1:]], 0) + Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0); + + Output result = ops::Reshape(s, input, new_shape); + + expanded_inputs.push_back({result.node(), 0, true}); + } + + inputs->swap(expanded_inputs); + return status; +} + +class AddVectorizer : public Vectorizer { + public: + Status Vectorize(const Node& node, Graph* outer_scope, + std::vector&& inputs, + std::vector* outputs) override { + if (node.num_inputs() != 2) { + return errors::Internal("Add op should only have two inputs."); + } + + TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope)); + + // Add new Add node with the same op and attrs as the original node + Node* new_add_node; + TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add") + .Input(inputs[0].node, inputs[0].output_index) + .Input(inputs[1].node, inputs[1].output_index) + .Finalize(outer_scope, &new_add_node)); + + // Add output mappings + outputs->push_back({new_add_node, 0, true}); + return Status::OK(); + } +}; + +REGISTER_VECTORIZER("Add", AddVectorizer); + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index d977ff3198..8b93b1f2b8 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -64,9 +64,18 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src, } } +// Update node attrs to keep its properties consistent with the function +void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) { + map_defun_node->AddAttr("output_types", map_defun_fn->ret_types); + + // TODO(rachelim): Propagate precise shapes if they're known, which may enable + // subsequent optimizations. + map_defun_node->AddAttr("output_shapes", std::vector( + map_defun_fn->ret_types.size())); +} + Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, const TensorDesc& output) { - // Note that we don't update MapDefun attrs as we go, only when we are done DataType type = output.first->output_type(output.second); int index = map_defun_fn->ret_nodes.size(); @@ -83,13 +92,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0); map_defun_fn->ret_nodes.push_back(ret_node); map_defun_fn->ret_types.push_back(type); + UpdateMapDefunAttrs(map_defun_fn, map_defun_node); return s; } void RemoveMapDefunOutput(int output_position, Graph* outer_scope, FunctionBody* map_defun_fn, Node* map_defun_node) { - // Note that we don't update MapDefun attrs as we go, only when we are done DCHECK_LT(output_position, map_defun_fn->ret_nodes.size()) << "Trying to remove output that doesn't exist. Output number: " << output_position; @@ -102,6 +111,7 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope, output_position); map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() + output_position); + UpdateMapDefunAttrs(map_defun_fn, map_defun_node); // Renumber the nodes and edges that come after for (int i = 0; i < num_later_outputs; ++i) { @@ -342,13 +352,6 @@ void Vectorization::VectorizeHelper() { // need the MapDefun node and can delete it. if (map_defun_fn_->ret_nodes.empty()) { outer_scope_->RemoveNode(map_defun_node_); - } else { - // Update MapDefun node attrs accordingly - DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size()); - map_defun_node_->AddAttr( - "output_shapes", - std::vector(map_defun_fn_->ret_types.size())); - map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types); } } diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc index a6020e36bb..be498d150b 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc @@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) { FunctionDef* vectorized; Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized); LOG(ERROR) << s; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_EQ(GetRetval(*vectorized, 0), "ret0"); @@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); auto map_defun_node = vectorized->node_def( function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized)); @@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& unpack_node = vectorized->node_def( @@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); // They should be unchanged // We check this somewhat manually as the names of nodes may have changed EXPECT_EQ(vectorized->node_def_size(), 1); @@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized)); @@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); auto const_node = vectorized->node_def( @@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); auto find_const = [vectorized](int val) -> const NodeDef* { for (const auto& n : vectorized->node_def()) { @@ -924,6 +924,89 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name())); } +// Before: +// +// +------+ +// +-----------------+ Arg0 +----------------------+ +// | +---+--+ | +// | | | +// | +---v--+ | +// | +-------------+ Arg0 +------------------+ | +// | | +---+--+ | | +// | | | | | +// | | | +-----+ | | +// | | | |Const| | | +// | | | +-+---+ | | +// | | | | | | +// | | | +--------+ | | +// | | | | | | +// | | +-v---v-+ | | +// | | | Add | | | +// | | +-+-----+ | | +// | | | | | +// | | | | | +// | | MapDefun +-v----+ | | +// | +---------------| Ret |----------------+ | +// | +--v---+ | +// | | | +// | | | +// | +--v---- | +// +-------------------| Ret |--------------------+ +// +------+ +// +// +// After: +// +// +------+ +// +------------+ Arg0 +----------------------+ +// | +---+--+ | +// | | | +// | | +-----+ | +// | | |Const| | +// | +-v---------+ +--+--+ | +// | |ExpandDims*| | | +// | +-----+-----+ | | +// | | | | +// | +-----+ +-----+ | +// | | | | +// | +-v-v-+ | +// | | Add | | +// | +--+--+ | +// | | | +// | +---v--+ | +// +-----------------------+ Ret +-----------+ +// +------+ +// +TEST(VectorizeMapDefunTest, VectorizeDefunAdd) { + // Note that this checks that the "Add" vectorizer is successful, but does not + // check that the transformed function is correct (i.e. produces the same + // output as the unvectorized map defun). For the latter, the tests are in + // tensorflow/python/data/experimental/kernel_tests/optimization/ + // map_vectorization_test.py + FunctionDef inner = FunctionDefHelper::Create( + "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */}, + {/* nodes */ FunctionDefHelper::Const("Const", 2), + {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}}, + {{"ret0", "Add:z:0"}}); + + FunctionDef outer = FunctionDefHelper::Create( + "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"}, + {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}}); + + NodeDef* map_defun = + AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}}, + inner.signature().name(), &outer); + CHECK_NOTNULL(map_defun); + + FunctionDefLibrary lib; + *lib.add_function() = outer; + *lib.add_function() = inner; + FunctionDef* vectorized; + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE( + !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); +} + // TODO(rachelim): More test cases when we get around to implementing them: // [] A badly defined converter, e.g. doesn't produce nodes that have the // same number of outputs/inputs as the nodes to be converted diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py index 803ff87924..d1d6cf28ab 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py @@ -80,6 +80,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): ("Basic", lambda x: (x, x + 1), None), ("Const", lambda x: 2, 12), ("Parallel", lambda x: (x, x + 1), 12), + ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None), ("Gather", lambda x: array_ops.gather(x, 0), 12), ) def testOptimization(self, map_fn, num_parallel_calls): -- GitLab From a6fcb9d3d81e9207650eda1c899051ccbb97dec7 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 9 Oct 2018 14:38:55 -0700 Subject: [PATCH 151/411] Avoid creating sparse tensor objects before library is initialized. PiperOrigin-RevId: 216425002 --- .../sequence_feature_column_test.py | 482 +++++++++--------- 1 file changed, 255 insertions(+), 227 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 929e83523a..707f93b2da 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -39,18 +39,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input_a': sparse_tensor.SparseTensorValue( + 'sparse_input_args_a': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)), - 'sparse_input_b': sparse_tensor.SparseTensorValue( + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'sparse_input_args_b': { # example 0, ids [1] # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 2, 0), + 'dense_shape': (2, 2)}, 'expected_input_layer': [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], @@ -58,20 +58,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],], 'expected_sequence_length': [1, 2]}, {'testcase_name': '3D', - 'sparse_input_a': sparse_tensor.SparseTensorValue( + 'sparse_input_args_a': { # feature 0, ids [[2], [0, 1]] # feature 1, ids [[0, 0], [1]] - indices=( + 'indices': ( (0, 0, 0), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2, 0, 1, 0, 0, 1), - dense_shape=(2, 2, 2)), - 'sparse_input_b': sparse_tensor.SparseTensorValue( + 'values': (2, 0, 1, 0, 0, 1), + 'dense_shape': (2, 2, 2)}, + 'sparse_input_args_b': { # feature 0, ids [[1, 1], [1]] # feature 1, ids [[2], [0]] - indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 1, 1, 2, 0), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (1, 1, 1, 2, 0), + 'dense_shape': (2, 2, 2)}, 'expected_input_layer': [ # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]], @@ -80,9 +80,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): 'expected_sequence_length': [2, 2]}, ) def test_embedding_column( - self, sparse_input_a, sparse_input_b, expected_input_layer, + self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): + sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b) vocabulary_size = 3 embedding_dimension_a = 2 embedding_values_a = ( @@ -261,18 +263,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input_a': sparse_tensor.SparseTensorValue( + 'sparse_input_args_a': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)), - 'sparse_input_b': sparse_tensor.SparseTensorValue( + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'sparse_input_args_b': { # example 0, ids [1] # example 1, ids [1, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 1, 0), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 1, 0), + 'dense_shape': (2, 2)}, 'expected_input_layer': [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], @@ -280,20 +282,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], 'expected_sequence_length': [1, 2]}, {'testcase_name': '3D', - 'sparse_input_a': sparse_tensor.SparseTensorValue( + 'sparse_input_args_a': { # feature 0, ids [[2], [0, 1]] # feature 1, ids [[0, 0], [1]] - indices=( + 'indices': ( (0, 0, 0), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2, 0, 1, 0, 0, 1), - dense_shape=(2, 2, 2)), - 'sparse_input_b': sparse_tensor.SparseTensorValue( + 'values': (2, 0, 1, 0, 0, 1), + 'dense_shape': (2, 2, 2)}, + 'sparse_input_args_b': { # feature 0, ids [[1, 1], [1]] # feature 1, ids [[1], [0]] - indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 1, 1, 1, 0), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (1, 1, 1, 1, 0), + 'dense_shape': (2, 2, 2)}, 'expected_input_layer': [ # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]], @@ -302,8 +304,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): 'expected_sequence_length': [2, 2]}, ) def test_indicator_column( - self, sparse_input_a, sparse_input_b, expected_input_layer, + self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): + sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b) + vocabulary_size_a = 3 vocabulary_size_b = 2 @@ -350,30 +355,32 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [0., 1] # example 1, [10.] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)), + 'indices': ((0, 0), (0, 1), (1, 0)), + 'values': (0., 1., 10.), + 'dense_shape': (2, 2)}, 'expected_input_layer': [ [[0.], [1.]], [[10.], [0.]]], 'expected_sequence_length': [2, 1]}, {'testcase_name': '3D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # feature 0, ids [[20, 3], [5]] # feature 1, ids [[3], [8]] - indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - values=(20, 3, 5., 3., 8.), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (20, 3, 5., 3., 8.), + 'dense_shape': (2, 2, 2)}, 'expected_input_layer': [ [[20.], [3.], [5.], [0.]], [[3.], [0.], [8.], [0.]]], 'expected_sequence_length': [2, 2]}, ) def test_numeric_column( - self, sparse_input, expected_input_layer, expected_sequence_length): + self, sparse_input_args, expected_input_layer, expected_sequence_length): + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( @@ -387,27 +394,27 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] # example 1, [10., 11., 12., 13.] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), - (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)), + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, 'expected_input_layer': [ # The output of numeric_column._get_dense_tensor should be flattened. [[0., 1., 2., 3.], [4., 5., 6., 7.]], [[10., 11., 12., 13.], [0., 0., 0., 0.]]], 'expected_sequence_length': [2, 1]}, {'testcase_name': '3D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] # example 1, [[10., 11., 12., 13.], []] - indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), - (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), - (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 2, 4)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 4)}, 'expected_input_layer': [ # The output of numeric_column._get_dense_tensor should be flattened. [[0., 1., 2., 3.], [4., 5., 6., 7.]], @@ -415,8 +422,10 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): 'expected_sequence_length': [2, 1]}, ) def test_numeric_column_multi_dim( - self, sparse_input, expected_input_layer, expected_sequence_length): + self, sparse_input_args, expected_input_layer, expected_sequence_length): """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( @@ -460,28 +469,29 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), - (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)), + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, 'expected_shape': [2, 2, 4]}, {'testcase_name': '3D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] # example 1, [[10., 11., 12., 13.], []] - indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), - (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2), - (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 2, 4)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 4)}, 'expected_shape': [2, 2, 4]}, ) def test_static_shape_from_tensors_numeric( - self, sparse_input, expected_shape): + self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, _ = sfc.sequence_input_layer( @@ -492,30 +502,31 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)), + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, 'expected_shape': [4, 2, 3]}, {'testcase_name': '3D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] # example 2, ids [] # example 3, ids [[1], [0, 2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), - (3, 0, 0), (3, 1, 0), (3, 1, 1)), - values=(2, 0, 1, 2, 1, 0, 2), - dense_shape=(4, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 0, 2), + 'dense_shape': (4, 2, 2)}, 'expected_shape': [4, 2, 3]} ) def test_static_shape_from_tensors_indicator( - self, sparse_input, expected_shape): + self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=3) indicator_column = fc.indicator_column(categorical_column) @@ -546,11 +557,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': 'rank_lt_3', - 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))}, + 'seq_input_arg': np.arange(100).reshape(10, 10)}, {'testcase_name': 'rank_gt_3', - 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))} + 'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)} ) - def test_sequence_input_throws_error(self, seq_input): + def test_sequence_input_throws_error(self, seq_input_arg): + seq_input = ops.convert_to_tensor(seq_input_arg) context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) context_input = math_ops.cast(context_input, dtype=dtypes.float32) @@ -559,11 +571,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': 'rank_lt_2', - 'context_input': ops.convert_to_tensor(np.arange(100))}, + 'context_input_arg': np.arange(100)}, {'testcase_name': 'rank_gt_2', - 'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))} + 'context_input_arg': np.arange(100).reshape(5, 5, 4)} ) - def test_context_input_throws_error(self, context_input): + def test_context_input_throws_error(self, context_input_arg): + context_input = ops.convert_to_tensor(context_input_arg) seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) context_input = math_ops.cast(context_input, dtype=dtypes.float32) @@ -657,25 +670,27 @@ class SequenceCategoricalColumnWithIdentityTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((1, 2, 0), dtype=np.int64), - dense_shape=(2, 2, 1))}, + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 2, 0), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((1, 2, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=(6, 7, 8), - dense_shape=(2, 2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=(6, 7, 8), - dense_shape=(2, 2, 2))} + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': (6, 7, 8), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': (6, 7, 8), + 'dense_shape': (2, 2, 2)}} ) - def test_get_sparse_tensors(self, inputs, expected): + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) @@ -691,27 +706,29 @@ class SequenceCategoricalColumnWithHashBucketTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('omar', 'stringer', 'marlo'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), # Ignored to avoid hash dependence in test. - values=np.array((0, 0, 0), dtype=np.int64), - dense_shape=(2, 2, 1))}, + 'values': np.array((0, 0, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'stringer', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), # Ignored to avoid hash dependence in test. - values=np.array((0, 0, 0), dtype=np.int64), - dense_shape=(2, 2, 2))} + 'values': np.array((0, 0, 0), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} ) - def test_get_sparse_tensors(self, inputs, expected): + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_hash_bucket( 'aaa', hash_bucket_size=10) @@ -742,25 +759,27 @@ class SequenceCategoricalColumnWithVocabularyFileTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1))}, + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('marlo', 'skywalker', 'omar'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((2, -1, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=('omar', 'skywalker', 'marlo'), - dense_shape=(2, 2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=np.array((0, -1, 2), dtype=np.int64), - dense_shape=(2, 2, 2))} + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'skywalker', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': np.array((0, -1, 2), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} ) - def test_get_sparse_tensors(self, inputs, expected): + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name, @@ -779,25 +798,27 @@ class SequenceCategoricalColumnWithVocabularyListTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1))}, + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('marlo', 'skywalker', 'omar'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((2, -1, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=('omar', 'skywalker', 'marlo'), - dense_shape=(2, 2, 2)), - 'expected': sparse_tensor.SparseTensorValue( - indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), - values=np.array((0, -1, 2), dtype=np.int64), - dense_shape=(2, 2, 2))} + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'skywalker', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': np.array((0, -1, 2), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} ) - def test_get_sparse_tensors(self, inputs, expected): + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -815,14 +836,14 @@ class SequenceEmbeddingColumnTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)), + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, 'expected': [ # example 0, ids [2] [[7., 11.], [0., 0.]], @@ -833,15 +854,15 @@ class SequenceEmbeddingColumnTest( # example 3, ids [1] [[3., 5.], [0., 0.]]]}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] # example 2, ids [] # example 3, ids [[1], [0, 2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), - (3, 0, 0), (3, 1, 0), (3, 1, 1)), - values=(2, 0, 1, 2, 1, 0, 2), - dense_shape=(4, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 0, 2), + 'dense_shape': (4, 2, 2)}, 'expected': [ # example 0, ids [[2]] [[7., 11.], [0., 0.]], @@ -852,7 +873,8 @@ class SequenceEmbeddingColumnTest( # example 3, ids [[1], [0, 2]] [[3., 5.], [4., 6.5]]]} ) - def test_get_sequence_dense_tensor(self, inputs, expected): + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 embedding_dimension = 2 embedding_values = ( @@ -884,23 +906,24 @@ class SequenceEmbeddingColumnTest( @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, 'expected_sequence_length': [1, 2]}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2, 0, 1, 2), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 2), + 'dense_shape': (2, 2, 2)}, 'expected_sequence_length': [1, 2]} ) - def test_sequence_length(self, inputs, expected_sequence_length): + def test_sequence_length(self, inputs_args, expected_sequence_length): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 categorical_column = sfc.sequence_categorical_column_with_identity( @@ -1124,14 +1147,14 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)), + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, 'expected': [ # example 0, ids [2] [[0., 0., 1.], [0., 0., 0.]], @@ -1142,15 +1165,15 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): # example 3, ids [1] [[0., 1., 0.], [0., 0., 0.]]]}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] # example 2, ids [] # example 3, ids [[1], [2, 2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), - (3, 0, 0), (3, 1, 0), (3, 1, 1)), - values=(2, 0, 1, 2, 1, 2, 2), - dense_shape=(4, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 2, 2), + 'dense_shape': (4, 2, 2)}, 'expected': [ # example 0, ids [[2]] [[0., 0., 1.], [0., 0., 0.]], @@ -1161,7 +1184,8 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): # example 3, ids [[1], [2, 2]] [[0., 1., 0.], [0., 0., 2.]]]} ) - def test_get_sequence_dense_tensor(self, inputs, expected): + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 categorical_column = sfc.sequence_categorical_column_with_identity( @@ -1176,23 +1200,24 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, 'expected_sequence_length': [1, 2]}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2, 0, 1, 2), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 2), + 'dense_shape': (2, 2, 2)}, 'expected_sequence_length': [1, 2]} ) - def test_sequence_length(self, inputs, expected_sequence_length): + def test_sequence_length(self, inputs_args, expected_sequence_length): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 categorical_column = sfc.sequence_categorical_column_with_identity( @@ -1269,27 +1294,28 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, values [0., 1] # example 1, [10.] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)), + 'indices': ((0, 0), (0, 1), (1, 0)), + 'values': (0., 1., 10.), + 'dense_shape': (2, 2)}, 'expected': [ [[0.], [1.]], [[10.], [0.]]]}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # feature 0, ids [[20, 3], [5]] # feature 1, ids [[3], [8]] - indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), - values=(20, 3, 5., 3., 8.), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (20, 3, 5., 3., 8.), + 'dense_shape': (2, 2, 2)}, 'expected': [ [[20.], [3.], [5.], [0.]], [[3.], [0.], [8.], [0.]]]}, ) - def test_get_sequence_dense_tensor(self, inputs, expected): + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( @@ -1335,23 +1361,23 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'sparse_input': sparse_tensor.SparseTensorValue( + 'sparse_input_args': { # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), - (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)), + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, 'expected_dense_tensor': [ [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]}, {'testcase_name': '3D', - 'sparse_input': sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), - (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), - (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 2, 8)), + 'sparse_input_args': { + 'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), + (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), + (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 8)}, 'expected_dense_tensor': [ [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]], [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]], @@ -1359,8 +1385,9 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]}, ) def test_get_dense_tensor_multi_dim( - self, sparse_input, expected_dense_tensor): + self, sparse_input_args, expected_dense_tensor): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( @@ -1372,43 +1399,44 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( {'testcase_name': '2D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2., 0., 1.), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2., 0., 1.), + 'dense_shape': (2, 2)}, 'expected_sequence_length': [1, 2], 'shape': (1,)}, {'testcase_name': '3D', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2., 0., 1., 2.), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2., 0., 1., 2.), + 'dense_shape': (2, 2, 2)}, 'expected_sequence_length': [1, 2], 'shape': (1,)}, {'testcase_name': '2D_with_shape', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [2] # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2., 0., 1.), - dense_shape=(2, 2)), + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2., 0., 1.), + 'dense_shape': (2, 2)}, 'expected_sequence_length': [1, 1], 'shape': (2,)}, {'testcase_name': '3D_with_shape', - 'inputs': sparse_tensor.SparseTensorValue( + 'inputs_args': { # example 0, ids [[2]] # example 1, ids [[0, 1], [2]] - indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), - values=(2., 0., 1., 2.), - dense_shape=(2, 2, 2)), + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2., 0., 1., 2.), + 'dense_shape': (2, 2, 2)}, 'expected_sequence_length': [1, 2], 'shape': (2,)}, ) - def test_sequence_length(self, inputs, expected_sequence_length, shape): + def test_sequence_length(self, inputs_args, expected_sequence_length, shape): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) _, sequence_length = numeric_column._get_sequence_dense_tensor( -- GitLab From c1093a3757224257fed0f7a1959d0fc99d5c757f Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 9 Oct 2018 15:02:51 -0700 Subject: [PATCH 152/411] In TPUMirroredVariable, when setting _initializer_op and _initial_value attributes, set the attributes of all the contained variables. This fixes a bug that tf.train.init_from_checkpoint doesn't overwrite the initialization values correctly for TPUMirroredVariable. PiperOrigin-RevId: 216429476 --- tensorflow/contrib/distribute/python/values.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index 0dd78ba185..472cb4230c 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -475,6 +475,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase): self._aggregation = aggregation # Needed for GradientTape self._trainable = self._primary_var.trainable + # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s + # initializer is composed of the initializers of the components variables. + # However, in some cases, such as when restoring from a checkpoint, we may + # set the _initializer_op property on the entire `TPUMirroredVariable`. + self._initializer_op = None def _get(self, device=None): """Returns the value for the current device or raises a ValueError.""" @@ -704,8 +709,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase): @property def initializer(self): - return control_flow_ops.group( - [v.initializer for v in nest.flatten(self._index)]) + if self._initializer_op: + init_op = self._initializer_op + else: + init_op = control_flow_ops.group( + [v.initializer for v in self._index.values()]) + return init_op @property def graph(self): -- GitLab From 5f69248a692f7b47ea11930621f4f19d0397fe8c Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 9 Oct 2018 15:07:47 -0700 Subject: [PATCH 153/411] Make defun work under distributed strategies. The core of the change is have the gradient tape capture distributed variables instead of plain ResourceVariables. In other words, we move the distribution awareness from defun down to tape and rely on distributed variable magic to provide us with the right variable at runtime. In tower context, we always watch the container (e.g. MirroredVariable). In cross tower context, we always watch all the components. PiperOrigin-RevId: 216430530 --- .../distribute/python/mirrored_strategy.py | 23 +++++--- .../python/mirrored_strategy_multigpu_test.py | 58 +++++++++++++++++++ tensorflow/python/eager/backprop_test.py | 24 ++++++++ tensorflow/python/eager/function.py | 53 ++--------------- tensorflow/python/eager/tape.py | 31 +++++++++- 5 files changed, 128 insertions(+), 61 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index a32424b316..0f82508428 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -293,7 +293,8 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs): collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) for v in index.values(): - l.remove(v) + if v in l: + l.remove(v) g.add_to_collections(collections, result) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) @@ -461,16 +462,20 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: - if context.executing_eagerly(): - kwargs["initial_value"] = array_ops.identity( - index[devices[0]].value()) - else: - def initial_value_fn(device=d): + def initial_value_fn(device=d): + if context.executing_eagerly(): + init_value = index[devices[0]].value() + return array_ops.identity(init_value) + else: with ops.device(device): - return array_ops.identity(index[devices[0]].initial_value) - kwargs["initial_value"] = initial_value_fn + init_value = index[devices[0]].initial_value + return array_ops.identity(init_value) + kwargs["initial_value"] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): - v = next_creator(*args, **kwargs) + # Don't record operations (e.g. other variable reads) during + # variable creation. + with tape.stop_recording(): + v = next_creator(*args, **kwargs) assert not isinstance(v, values.DistributedVariable) index[d] = v return index diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index eeac528329..ed36639ce8 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -20,6 +20,8 @@ from __future__ import print_function import sys +import numpy as np + from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.contrib.distribute.python import multi_worker_test_base from tensorflow.contrib.distribute.python import strategy_test_lib @@ -34,7 +36,10 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras.engine import training as keras_training +from tensorflow.python.keras.layers import core as keras_core from tensorflow.python.layers import core +from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell_impl @@ -43,6 +48,8 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import device_util from tensorflow.python.training import distribution_strategy_context +from tensorflow.python.training import gradient_descent +from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.training import server_lib @@ -1245,6 +1252,22 @@ class MockModel(object): return x +class MiniModel(keras_training.Model): + """Minimal model for mnist. + + Useful for testing and debugging on slow TPU simulators. + """ + + def __init__(self): + super(MiniModel, self).__init__(name="") + self.fc = keras_core.Dense(1, name="fc", kernel_initializer="ones", + bias_initializer="ones") + + def call(self, inputs, training=True): + inputs = array_ops.ones([1, 10]) + return self.fc(inputs) + + class MirroredStrategyDefunTest(test.TestCase): def _skip_eager_if_gpus_less_than(self, num_gpus): @@ -1365,6 +1388,41 @@ class MirroredStrategyDefunTest(test.TestCase): "GPU:0": 3.0 * 1.25}) self._call_and_check(fn1, [factors], expected_result, [fn1]) + @test_util.run_in_graph_and_eager_modes() + def testTrain(self): + self._skip_eager_if_gpus_less_than(1) + + cpu_dev = device_util.canonicalize("CPU:0") + gpu_dev = device_util.canonicalize("GPU:0") + devices = [cpu_dev, gpu_dev] + dist = mirrored_strategy.MirroredStrategy(devices) + + with dist.scope(): + mock_model = MiniModel() + mock_model.call = function.defun(mock_model.call) + + def loss_fn(ctx): + del ctx + return mock_model(array_ops.ones([1, 10])) + + gradients_fn = backprop.implicit_grad(loss_fn) + gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn) + grads_and_vars = dist.call_for_each_tower( + gradients_fn, None, run_concurrently=False) + + optimizer = gradient_descent.GradientDescentOptimizer(0.25) + update_ops = optimizer._distributed_apply(dist, grads_and_vars) # pylint: disable=protected-access + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + self.evaluate(update_ops) + + updated_var_values = self.evaluate(mock_model.variables) + # All variables start at 1.0 and get two updates of 0.25. + self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) + self.assertAllEqual([0.5], updated_var_values[1]) + + class MultiWorkerMirroredStrategyTest( multi_worker_test_base.MultiWorkerTestBase, diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 7e5c9f3cb6..b1b20fafd2 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -258,6 +258,30 @@ class BackpropTest(test.TestCase): loss += v * v self.assertAllEqual(t.gradient(loss, v), 2.0) + def testAutomaticWatchedVariables(self): + with backprop.GradientTape() as t: + self.assertEqual(0, len(t.watched_variables())) + v = resource_variable_ops.ResourceVariable(1.0) + loss = v * v + self.assertAllEqual([v], t.watched_variables()) + + t.reset() + self.assertEqual(0, len(t.watched_variables())) + loss += v * v + self.assertAllEqual([v], t.watched_variables()) + + def testExplicitWatchedVariables(self): + with backprop.GradientTape() as t: + self.assertEqual(0, len(t.watched_variables())) + v = resource_variable_ops.ResourceVariable(1.0) + t.watch(v) + self.assertAllEqual([v], t.watched_variables()) + + t.reset() + self.assertEqual(0, len(t.watched_variables())) + t.watch(v) + self.assertAllEqual([v], t.watched_variables()) + @test_util.assert_no_new_tensors def testGradientNone(self): diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index ff138cad1e..f1a63adce1 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -51,7 +51,6 @@ from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.training import distribution_strategy_context from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -202,6 +201,7 @@ class FuncGraph(ops.Graph): # from the default graph even in eager mode. Maybe it should be part of the # eager context? self._distribution_strategy_stack = graph._distribution_strategy_stack + self._variable_creator_stack = graph._variable_creator_stack # Inherit the graph key, since this is used for matching variables in # optimizers. self._graph_key = graph._graph_key @@ -563,17 +563,6 @@ class Function(object): self._func_graph.inputs, self._func_graph.outputs, self._attrs) self._backward_graph_function = None - # Map holding distributed variables, keyed by resource handle tensors. - self._distributed_variables = {} - strategy = distribution_strategy_context.get_distribution_strategy() - for variable in self._func_graph.variables: - # If variable is not distributed, unwrap returns [variable]. - component_variables = strategy.unwrap(variable) - # Only update the dictionary when the variable is actually distributed. - if (len(component_variables) > 1 or component_variables[0] != variable): - for component_variable in component_variables: - self._distributed_variables[component_variable.handle] = variable - def __call__(self, *args): """Executes the wrapped function. @@ -602,7 +591,6 @@ class Function(object): if v.trainable: tape.variable_accessed(v) - captures = self._resolve_captured_inputs() tensor_inputs = [] for i, arg in enumerate(nest.flatten(args)): if isinstance(arg, resource_variable_ops.ResourceVariable): @@ -615,9 +603,10 @@ class Function(object): raise ValueError("All inputs to `Function`s must be Tensors; " "on invocation of %s, the %d-th input (%s) was not a " "Tensor." % (self._func_graph.name, i, str(arg))) - args = tensor_inputs + captures + args = tensor_inputs + self._captured_inputs - if tape.should_record(tensor_inputs) or tape.should_record(captures): + if (tape.should_record(tensor_inputs) or + tape.should_record(self._captured_inputs)): return self._backprop_call(args) # Only need to override the gradient in graph mode and when we have outputs. @@ -804,32 +793,6 @@ class Function(object): args, backward_function) return self._build_call_outputs(real_outputs) - def _resolve_captured_inputs(self): - """Resolve captured distributed variables to their current values. - - Some inputs can be distributed variables. Such variables yield a different - component (i.e. actual tf.Variable) variables depending on the context of - execution. - - Returns: - a list of resolved captured input tensors. - """ - if self._distributed_variables: - # Loop over each captured input and check if it corresponds to something - # distributed. If so, get its _distributed_container and fetch the - # component appropriate for the current execution context. - resolved_captured_inputs = self._captured_inputs[:] - for i, captured_input in enumerate(self._captured_inputs): - distributed_var = self._distributed_variables.get(captured_input, None) - if distributed_var is not None: - # distributed variables override __getattr__ and substitute the - # right component variable. In here, `distributed_var.handle` - # actually does the equivalent of - # distributed_var.get_current_component_var().handle. - resolved_captured_inputs[i] = distributed_var.handle - return resolved_captured_inputs - return self._captured_inputs - def _build_call_outputs(self, result): """Maps the fdef output list to actual output structure. @@ -1010,14 +973,6 @@ def func_graph_from_py_func(name, for x in _flatten(func_graph.structured_outputs) if x is not None) - # Some captured variables might be components of DistributedValues. - # Instead of storing non-distributed component variables, we - # store their distributed containers so we can retrieve the correct - # component variables at call-time. - strategy = distribution_strategy_context.get_distribution_strategy() - for i, variable in enumerate(variables): - # If variable is not distributed value_container returns itself. - variables[i] = strategy.value_container(variable) func_graph.variables = variables # Register any other functions defined in the graph. diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index 399d90223c..ade945f874 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -21,6 +21,15 @@ from __future__ import print_function import contextlib from tensorflow.python import pywrap_tensorflow +from tensorflow.python.util.lazy_loader import LazyLoader + +# There is a circular dependency between this, ops.py, and +# distribution_strategy_context. +# TODO(b/117329403): Remove this circular dependency. +distribution_strategy_context = LazyLoader( + "distribute_lib", globals(), + "tensorflow.python.training." + "distribution_strategy_context") class Tape(object): @@ -52,12 +61,28 @@ def watch(tape, tensor): def watch_variable(tape, variable): """Marks this variable to be watched by the given tape.""" - pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, variable) # pylint: disable=protected-access + strategy = distribution_strategy_context.get_distribution_strategy() + if distribution_strategy_context.get_tower_context(): + variables = [strategy.value_container(variable)] + else: + variables = strategy.unwrap(variable) + for var in variables: + pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var) # pylint: disable=protected-access def variable_accessed(variable): - """Notifies all tapes in the stack that a variable has been accessed.""" - pywrap_tensorflow.TFE_Py_TapeVariableAccessed(variable) + """Notifies all tapes in the stack that a variable has been accessed. + + Args: + variable: variable to be watched. + """ + strategy = distribution_strategy_context.get_distribution_strategy() + if distribution_strategy_context.get_tower_context(): + variables = [strategy.value_container(variable)] + else: + variables = strategy.unwrap(variable) + for var in variables: + pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var) def pop_tape(tape): -- GitLab From 771955e2b8be98a0b38fada41bd67f663397c87d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 15:18:21 -0700 Subject: [PATCH 154/411] Raises an appropriate error if `add_weight` is called on a Keras network. PiperOrigin-RevId: 216432358 --- tensorflow/python/keras/engine/network.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 5969fea2b2..266c48d304 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -432,6 +432,27 @@ class Network(base_layer.Layer): 'assign variables to attributes and they will show up in the weights ' 'and variables properties.') + def add_weight(self, + name, + shape, + dtype=None, + initializer=None, + regularizer=None, + trainable=None, + constraint=None, + partitioner=None, + use_resource=None, + synchronization=variables.VariableSynchronization.AUTO, + aggregation=variables.VariableAggregation.NONE, + **kwargs): + if self._is_graph_network: + raise NotImplementedError('`add_weight` is not supported on Networks.') + else: + raise NotImplementedError( + '`add_weight` is not supported on Networks. However, you may ' + 'assign variables to attributes and they will show up in the weights ' + 'and variables properties.') + def add_loss(self, *args, **kwargs): if context.executing_eagerly(): raise NotImplementedError('`add_loss` is not supported on Networks ' -- GitLab From 69c4a426fc4a3afd83c8190467b07c17b8b2ed60 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 9 Oct 2018 15:47:56 -0700 Subject: [PATCH 155/411] [XLA] Allow scatter to share the operand buffer with the output This avoids a copy. PiperOrigin-RevId: 216437329 --- .../xla/service/hlo_dataflow_analysis.cc | 1 + .../xla/service/hlo_dataflow_analysis_test.cc | 38 +++++++++++++++++++ .../xla/service/tuple_points_to_analysis.cc | 1 + .../service/tuple_points_to_analysis_test.cc | 38 +++++++++++++++++++ 4 files changed, 78 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index c22adcdd8d..71122e73b1 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -1048,6 +1048,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 510d6360a1..d27786d160 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index 811ac55e2d..ef4e69180d 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -756,6 +756,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser( } } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index e9a07b14ed..a571bd571b 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); -- GitLab From c98ffffcb4e0cc668c0ff7b73d51677a7eb7dcf4 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 9 Oct 2018 16:19:46 -0700 Subject: [PATCH 156/411] Part 2/3 of the update of tf.keras to the Keras 2.2.4 API. PiperOrigin-RevId: 216442569 --- tensorflow/python/keras/backend.py | 64 ++++++---- tensorflow/python/keras/callbacks.py | 101 +++++++++++---- tensorflow/python/keras/callbacks_test.py | 118 ++++++++++++++++-- .../python/keras/layers/convolutional.py | 14 ++- .../python/keras/layers/convolutional_test.py | 36 ++++++ tensorflow/python/kernel_tests/rnn_test.py | 4 +- .../golden/v1/tensorflow.keras.backend.pbtxt | 2 +- ...flow.keras.callbacks.-early-stopping.pbtxt | 6 +- ...orflow.keras.callbacks.-tensor-board.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 2 +- .../golden/v2/tensorflow.keras.backend.pbtxt | 2 +- ...flow.keras.callbacks.-early-stopping.pbtxt | 6 +- ...orflow.keras.callbacks.-tensor-board.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 2 +- 14 files changed, 296 insertions(+), 65 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 13f52fbae7..7509ef9c59 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -2338,7 +2338,8 @@ def permute_dimensions(x, pattern): @tf_export('keras.backend.resize_images') -def resize_images(x, height_factor, width_factor, data_format): +def resize_images(x, height_factor, width_factor, data_format, + interpolation='nearest'): """Resizes the images contained in a 4D tensor. Arguments: @@ -2346,40 +2347,55 @@ def resize_images(x, height_factor, width_factor, data_format): height_factor: Positive integer. width_factor: Positive integer. data_format: One of `"channels_first"`, `"channels_last"`. + interpolation: A string, one of `nearest` or `bilinear`. Returns: A tensor. Raises: - ValueError: if `data_format` is neither - `channels_last` or `channels_first`. + ValueError: in case of incorrect value for + `data_format` or `interpolation`. """ if data_format == 'channels_first': - original_shape = int_shape(x) - new_shape = array_ops.shape(x)[2:] - new_shape *= constant_op.constant( - np.array([height_factor, width_factor]).astype('int32')) + rows, cols = 2, 3 + elif data_format == 'channels_last': + rows, cols = 1, 2 + else: + raise ValueError('Invalid `data_format` argument: %s' % (data_format,)) + + original_shape = int_shape(x) + new_shape = array_ops.shape(x)[rows:cols + 1] + new_shape *= constant_op.constant( + np.array([height_factor, width_factor], dtype='int32')) + + if data_format == 'channels_first': x = permute_dimensions(x, [0, 2, 3, 1]) + if interpolation == 'nearest': x = image_ops.resize_nearest_neighbor(x, new_shape) + elif interpolation == 'bilinear': + x = image_ops.resize_bilinear(x, new_shape) + else: + raise ValueError('interpolation should be one ' + 'of "nearest" or "bilinear".') + if data_format == 'channels_first': x = permute_dimensions(x, [0, 3, 1, 2]) - x.set_shape((None, None, original_shape[2] * height_factor - if original_shape[2] is not None else None, - original_shape[3] * width_factor - if original_shape[3] is not None else None)) - return x - elif data_format == 'channels_last': - original_shape = int_shape(x) - new_shape = array_ops.shape(x)[1:3] - new_shape *= constant_op.constant( - np.array([height_factor, width_factor]).astype('int32')) - x = image_ops.resize_nearest_neighbor(x, new_shape) - x.set_shape((None, original_shape[1] * height_factor - if original_shape[1] is not None else None, - original_shape[2] * width_factor - if original_shape[2] is not None else None, None)) - return x + + if original_shape[rows] is None: + new_height = None else: - raise ValueError('Invalid data_format: ' + str(data_format)) + new_height = original_shape[rows] * height_factor + + if original_shape[cols] is None: + new_width = None + else: + new_width = original_shape[cols] * width_factor + + if data_format == 'channels_first': + output_shape = (None, None, new_height, new_width) + else: + output_shape = (None, new_height, new_width, None) + x.set_shape(output_shape) + return x @tf_export('keras.backend.resize_volumes') diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 3d6000f223..4c12c83a4c 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -24,6 +24,7 @@ from collections import Iterable from collections import OrderedDict import copy import csv +import io import json import math import os @@ -606,24 +607,28 @@ class EarlyStopping(Callback): """Stop training when a monitored quantity has stopped improving. Arguments: - monitor: quantity to be monitored. - min_delta: minimum change in the monitored quantity + monitor: Quantity to be monitored. + min_delta: Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. - patience: number of epochs with no improvement + patience: Number of epochs with no improvement after which training will be stopped. verbose: verbosity mode. - mode: one of {auto, min, max}. In `min` mode, + mode: One of `{"auto", "min", "max"}`. In `min` mode, training will stop when the quantity monitored has stopped decreasing; in `max` mode it will stop when the quantity monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. - baseline: baseline value for the monitored quantity. + baseline: Baseline value for the monitored quantity. Training will stop if the model doesn't show improvement over the baseline. + restore_best_weights: Whether to restore model weights from + the epoch with the best value of the monitored quantity. + If False, the model weights obtained at the last step of + training are used. """ def __init__(self, @@ -632,7 +637,8 @@ class EarlyStopping(Callback): patience=0, verbose=0, mode='auto', - baseline=None): + baseline=None, + restore_best_weights=False): super(EarlyStopping, self).__init__() self.monitor = monitor @@ -642,6 +648,8 @@ class EarlyStopping(Callback): self.min_delta = abs(min_delta) self.wait = 0 self.stopped_epoch = 0 + self.restore_best_weights = restore_best_weights + self.best_weights = None if mode not in ['auto', 'min', 'max']: logging.warning('EarlyStopping mode %s is unknown, ' @@ -673,25 +681,37 @@ class EarlyStopping(Callback): self.best = np.Inf if self.monitor_op == np.less else -np.Inf def on_epoch_end(self, epoch, logs=None): - current = logs.get(self.monitor) + current = self.get_monitor_value(logs) if current is None: - logging.warning('Early stopping conditioned on metric `%s` ' - 'which is not available. Available metrics are: %s', - self.monitor, ','.join(list(logs.keys()))) return if self.monitor_op(current - self.min_delta, self.best): self.best = current self.wait = 0 + if self.restore_best_weights: + self.best_weights = self.model.get_weights() else: self.wait += 1 if self.wait >= self.patience: self.stopped_epoch = epoch self.model.stop_training = True + if self.restore_best_weights: + if self.verbose > 0: + print('Restoring model weights from the end of the best epoch.') + self.model.set_weights(self.best_weights) def on_train_end(self, logs=None): if self.stopped_epoch > 0 and self.verbose > 0: print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) + def get_monitor_value(self, logs): + logs = logs or {} + monitor_value = logs.get(self.monitor) + if monitor_value is None: + logging.warning('Early stopping conditioned on metric `%s` ' + 'which is not available. Available metrics are: %s', + self.monitor, ','.join(list(logs.keys()))) + return monitor_value + @tf_export('keras.callbacks.RemoteMonitor') class RemoteMonitor(Callback): @@ -839,6 +859,12 @@ class TensorBoard(Callback): `embeddings_layer_names`. Numpy array (if the model has a single input) or list of Numpy arrays (if the model has multiple inputs). Learn [more about embeddings](https://www.tensorflow.org/programmers_guide/embedding) + update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, + writes the losses and metrics to TensorBoard after each batch. + The same applies for `'epoch'`. If using an integer, let's say `1000`, + the callback will write the metrics and losses to TensorBoard every + 1000 samples. Note that writing too frequently to TensorBoard + can slow down your training. Raises: ValueError: If histogram_freq is set and no validation data is provided. @@ -862,7 +888,8 @@ class TensorBoard(Callback): embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, - embeddings_data=None): + embeddings_data=None, + update_freq='epoch'): super(TensorBoard, self).__init__() self.log_dir = log_dir self.histogram_freq = histogram_freq @@ -882,6 +909,12 @@ class TensorBoard(Callback): self.embeddings_layer_names = embeddings_layer_names self.embeddings_metadata = embeddings_metadata self.embeddings_data = embeddings_data + if update_freq == 'batch': + self.update_freq = 1 + else: + self.update_freq = update_freq + self._samples_seen = 0 + self._samples_seen_at_last_write = 0 def _init_writer(self): """Sets file writer.""" @@ -1045,13 +1078,17 @@ class TensorBoard(Callback): # use v2 summary ops with self.writer.as_default(), summary_ops_v2.always_record_summaries(): for name, value in logs.items(): - summary_ops_v2.scalar(name, value.item(), step=step) + if isinstance(value, np.ndarray): + value = value.item() + summary_ops_v2.scalar(name, value, step=step) else: # use FileWriter from v1 summary for name, value in logs.items(): + if isinstance(value, np.ndarray): + value = value.item() summary = tf_summary.Summary() summary_value = summary.value.add() - summary_value.simple_value = value.item() + summary_value.simple_value = value summary_value.tag = name self.writer.add_summary(summary, step) self.writer.flush() @@ -1076,10 +1113,14 @@ class TensorBoard(Callback): """Writes scalar summaries for metrics on every training batch.""" # Don't output batch_size and batch number as Tensorboard summaries logs = logs or {} - batch_logs = {('batch_' + k): v - for k, v in logs.items() - if k not in ['batch', 'size', 'num_steps']} - self._write_custom_summaries(self._total_batches_seen, batch_logs) + self._samples_seen += logs.get('size', 1) + samples_seen_since = self._samples_seen - self._samples_seen_at_last_write + if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: + batch_logs = {('batch_' + k): v + for k, v in logs.items() + if k not in ['batch', 'size', 'num_steps']} + self._write_custom_summaries(self._total_batches_seen, batch_logs) + self._samples_seen_at_last_write = self._samples_seen self._total_batches_seen += 1 def on_epoch_begin(self, epoch, logs=None): @@ -1103,7 +1144,11 @@ class TensorBoard(Callback): logs = {('epoch_' + k): v for k, v in logs.items() if k not in ['batch', 'size', 'num_steps']} - self._write_custom_summaries(epoch, logs) + if self.update_freq == 'epoch': + step = epoch + else: + step = self._samples_seen + self._write_custom_summaries(step, logs) # pop the histogram summary op after each epoch if self.histogram_freq: @@ -1309,7 +1354,12 @@ class CSVLogger(Callback): self.writer = None self.keys = None self.append_header = True - self.file_flags = 'b' if six.PY2 and os.name == 'nt' else '' + if six.PY2: + self.file_flags = 'b' + self._open_args = {} + else: + self.file_flags = '' + self._open_args = {'newline': '\n'} super(CSVLogger, self).__init__() def on_train_begin(self, logs=None): @@ -1317,9 +1367,12 @@ class CSVLogger(Callback): if os.path.exists(self.filename): with open(self.filename, 'r' + self.file_flags) as f: self.append_header = not bool(len(f.readline())) - self.csv_file = open(self.filename, 'a' + self.file_flags) + mode = 'a' else: - self.csv_file = open(self.filename, 'w' + self.file_flags) + mode = 'w' + self.csv_file = io.open(self.filename, + mode + self.file_flags, + **self._open_args) def on_epoch_end(self, epoch, logs=None): logs = logs or {} @@ -1345,9 +1398,13 @@ class CSVLogger(Callback): class CustomDialect(csv.excel): delimiter = self.sep + fieldnames = ['epoch'] + self.keys + if six.PY2: + fieldnames = [unicode(x) for x in fieldnames] + self.writer = csv.DictWriter( self.csv_file, - fieldnames=['epoch'] + self.keys, + fieldnames=fieldnames, dialect=CustomDialect) if self.append_header: self.writer.writeheader() diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 467bc4cdc4..bb85347033 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -313,6 +313,42 @@ class KerasCallbacksTest(test.TestCase): hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) assert len(hist.epoch) >= patience + def test_EarlyStopping_final_weights_when_restoring_model_weights(self): + + class DummyModel(object): + + def __init__(self): + self.stop_training = False + self.weights = -1 + + def get_weights(self): + return self.weights + + def set_weights(self, weights): + self.weights = weights + + def set_weight_to_epoch(self, epoch): + self.weights = epoch + + early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', + patience=2, + restore_best_weights=True) + early_stop.model = DummyModel() + losses = [0.2, 0.15, 0.1, 0.11, 0.12] + # The best configuration is in the epoch 2 (loss = 0.1000). + epochs_trained = 0 + early_stop.on_train_begin() + for epoch in range(len(losses)): + epochs_trained += 1 + early_stop.model.set_weight_to_epoch(epoch=epoch) + early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) + if early_stop.model.stop_training: + break + # The best configuration is in epoch 2 (loss = 0.1000), + # and while patience = 2, we're restoring the best weights, + # so we end up at the epoch with the best weights, i.e. epoch 2 + self.assertEqual(early_stop.model.get_weights(), 2) + def test_RemoteMonitor(self): if requests is None: return @@ -534,11 +570,15 @@ class KerasCallbacksTest(test.TestCase): batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, - epochs=1, + epochs=2, verbose=0) with open(filepath) as csvfile: - output = ' '.join(csvfile.readlines()) + list_lines = csvfile.readlines() + for line in list_lines: + assert line.count(sep) == 4 + assert len(list_lines) == 5 + output = ' '.join(list_lines) assert len(re.findall('epoch', output)) == 1 os.remove(filepath) @@ -1115,11 +1155,11 @@ class KerasCallbacksTest(test.TestCase): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - tb_cbk = keras.callbacks.TensorBoard(temp_dir) + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch') tb_cbk.writer = FileWriterStub(temp_dir) for batch in range(5): - tb_cbk.on_batch_end(batch, {'acc': np.float32(batch)}) + tb_cbk.on_batch_end(batch, {'acc': batch}) self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4]) self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.]) self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5) @@ -1147,14 +1187,17 @@ class KerasCallbacksTest(test.TestCase): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) - tb_cbk = keras.callbacks.TensorBoard(temp_dir) + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch') tb_cbk.writer = FileWriterStub(temp_dir) - tb_cbk.on_batch_end(0, {'acc': np.float32(5.0)}) - tb_cbk.on_epoch_end(0, {'acc': np.float32(10.0)}) + tb_cbk.on_batch_end(0, {'acc': 5.0}) batch_step, batch_summary = tb_cbk.writer.batch_summary self.assertEqual(batch_step, 0) self.assertEqual(batch_summary.value[0].simple_value, 5.0) + + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch') + tb_cbk.writer = FileWriterStub(temp_dir) + tb_cbk.on_epoch_end(0, {'acc': 10.0}) epoch_step, epoch_summary = tb_cbk.writer.epoch_summary self.assertEqual(epoch_step, 0) self.assertEqual(epoch_summary.value[0].simple_value, 10.0) @@ -1192,6 +1235,66 @@ class KerasCallbacksTest(test.TestCase): self.assertTrue(os.path.exists(temp_dir)) + def test_TensorBoard_update_freq(self): + + class FileWriterStub(object): + + def __init__(self, logdir, graph=None): + self.logdir = logdir + self.graph = graph + self.batch_summaries = [] + self.epoch_summaries = [] + + def add_summary(self, summary, step): + if 'batch_' in summary.value[0].tag: + self.batch_summaries.append((step, summary)) + elif 'epoch_' in summary.value[0].tag: + self.epoch_summaries.append((step, summary)) + + def flush(self): + pass + + def close(self): + pass + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) + + # Epoch mode + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch') + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) + self.assertEqual(tb_cbk.writer.batch_summaries, []) + tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1}) + self.assertEqual(len(tb_cbk.writer.epoch_summaries), 1) + + # Batch mode + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch') + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 1) + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 2) + self.assertFalse(tb_cbk.writer.epoch_summaries) + + # Integer mode + tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq=20) + tb_cbk.writer = FileWriterStub(temp_dir) + + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) + self.assertFalse(tb_cbk.writer.batch_summaries) + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 1) + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 1) + tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 2) + tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10}) + self.assertEqual(len(tb_cbk.writer.batch_summaries), 2) + self.assertFalse(tb_cbk.writer.epoch_summaries) + def test_RemoteMonitorWithJsonPayload(self): if requests is None: self.skipTest('`requests` required to run this test') @@ -1226,6 +1329,7 @@ class KerasCallbacksTest(test.TestCase): def test_fit_generator_with_callback(self): class TestCallback(keras.callbacks.Callback): + def set_model(self, model): # Check the model operations for the optimizer operations that # the _make_train_function adds under a named scope for the diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 8f5872385c..58024677ee 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -1951,6 +1951,7 @@ class UpSampling2D(Layer): It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". + interpolation: A string, one of `nearest` or `bilinear`. Input shape: 4D tensor with shape: @@ -1967,10 +1968,18 @@ class UpSampling2D(Layer): `(batch, channels, upsampled_rows, upsampled_cols)` """ - def __init__(self, size=(2, 2), data_format=None, **kwargs): + def __init__(self, + size=(2, 2), + data_format=None, + interpolation='nearest', + **kwargs): super(UpSampling2D, self).__init__(**kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.size = conv_utils.normalize_tuple(size, 2, 'size') + if interpolation not in {'nearest', 'bilinear'}: + raise ValueError('`interpolation` argument should be one of `"nearest"` ' + 'or `"bilinear"`.') + self.interpolation = interpolation self.input_spec = InputSpec(ndim=4) def compute_output_shape(self, input_shape): @@ -1992,7 +2001,8 @@ class UpSampling2D(Layer): def call(self, inputs): return backend.resize_images( - inputs, self.size[0], self.size[1], self.data_format) + inputs, self.size[0], self.size[1], self.data_format, + interpolation=self.interpolation) def get_config(self): config = {'size': self.size, 'data_format': self.data_format} diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index f88d632ab5..bdc175b8b9 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -789,6 +789,42 @@ class UpSamplingTest(test.TestCase): np.testing.assert_allclose(np_output, expected_out) + @tf_test_util.run_in_graph_and_eager_modes + def test_upsampling_2d_bilinear(self): + num_samples = 2 + stack_size = 2 + input_num_row = 11 + input_num_col = 12 + for data_format in ['channels_first', 'channels_last']: + if data_format == 'channels_first': + inputs = np.random.rand(num_samples, stack_size, input_num_row, + input_num_col) + else: + inputs = np.random.rand(num_samples, input_num_row, input_num_col, + stack_size) + + testing_utils.layer_test(keras.layers.UpSampling2D, + kwargs={'size': (2, 2), + 'data_format': data_format, + 'interpolation': 'bilinear'}, + input_shape=inputs.shape) + + if not context.executing_eagerly(): + for length_row in [2]: + for length_col in [2, 3]: + layer = keras.layers.UpSampling2D( + size=(length_row, length_col), + data_format=data_format) + layer.build(inputs.shape) + outputs = layer(keras.backend.variable(inputs)) + np_output = keras.backend.eval(outputs) + if data_format == 'channels_first': + self.assertEqual(np_output.shape[2], length_row * input_num_row) + self.assertEqual(np_output.shape[3], length_col * input_num_col) + else: + self.assertEqual(np_output.shape[1], length_row * input_num_row) + self.assertEqual(np_output.shape[2], length_col * input_num_col) + @tf_test_util.run_in_graph_and_eager_modes def test_upsampling_3d(self): num_samples = 2 diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py index 2f6963f6b8..907e1277a9 100644 --- a/tensorflow/python/kernel_tests/rnn_test.py +++ b/tensorflow/python/kernel_tests/rnn_test.py @@ -571,8 +571,8 @@ class RNNTest(test.TestCase): cell.set_weights(tf_weights) [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train}) - self.assertAllClose(tf_out, k_out) - self.assertAllClose(tf_state, k_state) + self.assertAllClose(tf_out, k_out, atol=1e-5) + self.assertAllClose(tf_state, k_state, atol=1e-5) def testBasicLSTMCellInterchangeWithLSTMCell(self): with self.session(graph=ops_lib.Graph()) as sess: diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt index 9feb7c09b8..5f0dfd7ae7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt @@ -386,7 +386,7 @@ tf_module { } member_method { name: "resize_images" - argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], " } member_method { name: "resize_volumes" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt index f71292856c..ed0f37647f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt @@ -5,7 +5,11 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], " + argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], " + } + member_method { + name: "get_monitor_value" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt index e58ba18c1c..e9d53b7225 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 40a56a0c94..b05e5ec84d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -82,7 +82,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], " + argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt index 9feb7c09b8..5f0dfd7ae7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt @@ -386,7 +386,7 @@ tf_module { } member_method { name: "resize_images" - argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], " } member_method { name: "resize_volumes" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt index f71292856c..ed0f37647f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt @@ -5,7 +5,11 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], " + argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], " + } + member_method { + name: "get_monitor_value" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt index e58ba18c1c..e9d53b7225 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 40a56a0c94..b05e5ec84d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -82,7 +82,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], " + argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], " } member_method { name: "add_loss" -- GitLab From 86777950480e10bc43b36facc478e2d706f23852 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Tue, 9 Oct 2018 16:21:56 -0700 Subject: [PATCH 157/411] Internal change PiperOrigin-RevId: 216442906 --- tensorflow/contrib/lite/build_def.bzl | 18 +++++++++--------- tensorflow/contrib/lite/testing/BUILD | 5 ++--- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 05efee18e7..f962a138f7 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -310,15 +310,8 @@ def generated_test_models_failing(conversion_mode): if conversion_mode == "toco-flex": # TODO(b/117328698): Fix and enable the known flex failures. return [ - "arg_min_max", - "div", - "floor_div", - "gather", "lstm", - "resize_bilinear", - "space_to_batch_nd", "split", - "transpose", "unpack", ] @@ -334,7 +327,8 @@ def generated_test_models_all(): """Generates a list of all tests with the different converters. Returns: - List of tuples representing (conversion mode, name of test). + List of tuples representing: + (conversion mode, name of test, test tags, test args). """ conversion_modes = generated_test_conversion_modes() tests = generated_test_models() @@ -343,12 +337,18 @@ def generated_test_models_all(): failing_tests = generated_test_models_failing(conversion_mode) for test in tests: tags = [] + args = [] if test in failing_tests: tags.append("notap") tags.append("manual") if conversion_mode: test += "_%s" % conversion_mode - options.append((conversion_mode, test, tags)) + + # Flex conversion shouldn't suffer from the same conversion bugs + # listed for the default TFLite kernel backend. + if conversion_mode == "toco-flex": + args.append("--ignore_known_bugs=false") + options.append((conversion_mode, test, tags, args)) return options def gen_zip_test(name, test_name, conversion_mode, **kwargs): diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 2edd420fea..3dc666f631 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -20,8 +20,7 @@ load( name = "zip_test_%s" % test_name, size = "large", srcs = ["generated_examples_zip_test.cc"], - args = [ - ] + select({ + args = args + select({ "//tensorflow:android": [], "//conditions:default": [ "--zip_file_path=$(location :zip_%s)" % test_name, @@ -61,7 +60,7 @@ load( "//tensorflow/core:android_tensorflow_test_lib", ], }), -) for conversion_mode, test_name, tags in generated_test_models_all()] +) for conversion_mode, test_name, tags, args in generated_test_models_all()] test_suite( name = "generated_zip_tests", -- GitLab From 2f5ebc0ea5e6d500ea8cd925234c569d6b32fd4e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 9 Oct 2018 16:22:22 -0700 Subject: [PATCH 158/411] [TF:XLA] Bump open source abseil revision to 445998d7ac4e5d3c50411d377e3b50e960d2d6c2 PiperOrigin-RevId: 216442983 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 40c226a861..b03af53cff 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -117,11 +117,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_absl", build_file = clean_dep("//third_party:com_google_absl.BUILD"), - sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82", - strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e", + sha256 = "cd1650daecfdd5591502bb017c70777c959cf604a962352bd5312bef8d78a8c6", + strip_prefix = "abseil-cpp-445998d7ac4e5d3c50411d377e3b50e960d2d6c2", urls = [ - "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz", - "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz", + "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz", ], ) -- GitLab From 6c391166b8b6ba43d2b0151e6fb9cf14864131a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 16:23:35 -0700 Subject: [PATCH 159/411] Add 'remove' operation to MutableHashTable and MutableDenseHashTable. PiperOrigin-RevId: 216443201 --- .../linear_optimizer/python/ops/sdca_ops.py | 3 +- .../ops/sharded_mutable_dense_hashtable.py | 2 + .../sharded_mutable_dense_hashtable_test.py | 6 + tensorflow/contrib/lookup/lookup_ops.py | 81 ++++- tensorflow/contrib/lookup/lookup_ops_test.py | 336 +++++++++++++++--- .../python/timeseries/math_utils.py | 19 +- .../python/timeseries/math_utils_test.py | 8 +- .../python/timeseries/state_management.py | 1 + .../api_def_LookupTableRemoveV2.pbtxt | 24 ++ tensorflow/core/framework/lookup_interface.cc | 8 + tensorflow/core/framework/lookup_interface.h | 17 + .../core/kernels/initializable_lookup_table.h | 6 + tensorflow/core/kernels/lookup_table_op.cc | 184 +++++++++- .../core/ops/compat/ops_history.v1.pbtxt | 20 ++ tensorflow/core/ops/lookup_ops.cc | 14 + 15 files changed, 643 insertions(+), 86 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 48ac429701..b5099a0bf6 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -152,7 +152,8 @@ class SdcaModel(object): default_value=[0.0, 0.0, 0.0, 0.0], # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe # empty_key (that will never collide with actual payloads). - empty_key=[0, 0]) + empty_key=[0, 0], + deleted_key=[1, 1]) summary.scalar('approximate_duality_gap', self.approximate_duality_gap()) summary.scalar('examples_seen', self._hashtable.size()) diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py index 5015fb0848..44a869f7c2 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py @@ -48,6 +48,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype, default_value, empty_key, + deleted_key, num_shards=1, checkpoint=True, name='ShardedMutableHashTable'): @@ -62,6 +63,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype=value_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, checkpoint=checkpoint, name='%s-%d-of-%d' % (name, i + 1, num_shards))) self._table_shards = table_shards diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py index 553b116a3b..2b56d0fa3a 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py @@ -33,6 +33,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = ShardedMutableDenseHashTable( @@ -40,6 +41,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -56,6 +58,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = [-0.1, 0.2] empty_key = [0, 1] + deleted_key = [1, 0] keys = constant_op.constant([[11, 12], [13, 14], [15, 16]], dtypes.int64) values = constant_op.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]], @@ -65,6 +68,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.float32, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -81,6 +85,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): def testExportSharded(self): with self.cached_session(): empty_key = -2 + deleted_key = -3 default_val = -1 num_shards = 2 keys = constant_op.constant([10, 11, 12], dtypes.int64) @@ -90,6 +95,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index f83765a48d..5abef822e8 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -292,8 +292,8 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None): class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. Example usage: @@ -391,6 +391,34 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): with ops.colocate_with(self._table_ref): return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -487,11 +515,11 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation using tensors as backing store. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. It uses "open addressing" with quadratic reprobing to resolve collisions. - Compared to `MutableHashTable` the insert and lookup operations in a + Compared to `MutableHashTable` the insert, remove and lookup operations in a `MutableDenseHashTable` are typically faster, but memory usage can be higher. However, `MutableDenseHashTable` does not require additional memory for temporary tensors created during checkpointing and restore operations. @@ -502,7 +530,9 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): table = tf.contrib.lookup.MutableDenseHashTable(key_dtype=tf.int64, value_dtype=tf.int64, default_value=-1, - empty_key=0) + empty_key=0, + deleted_key=-1) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) @@ -516,6 +546,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype, default_value, empty_key, + deleted_key, initial_num_buckets=None, shared_name=None, name="MutableDenseHashTable", @@ -530,7 +561,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype: the type of the value tensors. default_value: The value to use if a key is missing in the table. empty_key: the key to use to represent empty buckets internally. Must not - be used in insert or lookup operations. + be used in insert, remove or lookup operations. initial_num_buckets: the initial number of buckets. shared_name: If non-empty, this table will be shared under the given name across multiple sessions. @@ -538,9 +569,12 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): checkpoint: if True, the contents of the table are saved to and restored from checkpoints. If `shared_name` is empty for a checkpointed table, it is shared using the table node name. + deleted_key: the key to use to represent deleted buckets internally. Must + not be used in insert, remove or lookup operations and be different from + the empty_key. Returns: - A `MutableHashTable` object. + A `MutableDenseHashTable` object. Raises: ValueError: If checkpoint is True and no name was specified. @@ -555,6 +589,8 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): use_node_name_sharing = checkpoint and shared_name is None empty_key = ops.convert_to_tensor( empty_key, dtype=key_dtype, name="empty_key") + deleted_key = ops.convert_to_tensor( + deleted_key, dtype=key_dtype, name="deleted_key") executing_eagerly = context.executing_eagerly() if executing_eagerly and shared_name is None: # TODO(allenl): This will leak memory due to kernel caching by the @@ -564,6 +600,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): shared_name = "table_%d" % (ops.uid(),) self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=empty_key, + deleted_key=deleted_key, shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, value_dtype=value_dtype, @@ -648,6 +685,34 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): self._table_ref, keys, values, name=name) return op + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def export(self, name=None): """Returns tensors of all keys and values in the table. diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py index 9e9345e875..35b0d1bc44 100644 --- a/tensorflow/contrib/lookup/lookup_ops_test.py +++ b/tensorflow/contrib/lookup/lookup_ops_test.py @@ -303,13 +303,17 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTable(self): with self.cached_session(): default_val = -1 - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -472,13 +476,18 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTableOfTensors(self): with self.cached_session(): default_val = constant_op.constant([-1, -1], dtypes.int64) - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([[0, 1], [2, 3], [4, 5], [6, 7]], + dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -624,6 +633,26 @@ class MutableHashTableOpTest(test.TestCase): result = output.eval() self.assertAllEqual([0, 1, 3, -1], result) + def testMutableHashTableRemoveHighRank(self): + with self.test_session(): + default_val = -1 + keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]]) + values = constant_op.constant([[0, 1], [2, 3]], dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["salad", "tarkus"]) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + + input_string = constant_op.constant(["brain", "salad", "tank", "tarkus"]) + output = table.lookup(input_string) + + result = output.eval() + self.assertAllEqual([0, -1, 3, -1], result) + def testMutableHashTableOfTensorsFindHighRank(self): with self.cached_session(): default_val = constant_op.constant([-1, -1, -1], dtypes.int64) @@ -645,6 +674,30 @@ class MutableHashTableOpTest(test.TestCase): self.assertAllEqual( [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result) + def testMutableHashTableOfTensorsRemoveHighRank(self): + with self.test_session(): + default_val = constant_op.constant([-1, -1, -1], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery"]) + values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]], + dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(3, table.size().eval()) + + remove_string = constant_op.constant([["brain", "tank"]]) + table.remove(remove_string).run() + self.assertAllEqual(2, table.size().eval()) + + input_string = constant_op.constant([["brain", "salad"], + ["surgery", "tank"]]) + output = table.lookup(input_string) + self.assertAllEqual([2, 2, 3], output.get_shape()) + + result = output.eval() + self.assertAllEqual( + [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result) + def testMultipleMutableHashTables(self): with self.cached_session() as sess: default_val = -1 @@ -792,13 +845,22 @@ class MutableDenseHashTableOpTest(test.TestCase): def testBasic(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -806,17 +868,26 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([0, 1, -1], result) + self.assertAllEqual([0, -1, -1], result) def testBasicBool(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([True, True, True], dtypes.bool) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([True, True, True, True], dtypes.bool) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.bool, default_value=False, empty_key=0) + dtypes.int64, + dtypes.bool, + default_value=False, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([11, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -824,14 +895,30 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([True, True, False], result) + self.assertAllEqual([False, True, False], result) + + def testSameEmptyAndDeletedKey(self): + with self.cached_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table.size().eval()) def testLookupUnknownShape(self): with self.cached_session(): keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) table.insert(keys, values).run() self.assertAllEqual(3, table.size().eval()) @@ -844,45 +931,60 @@ class MutableDenseHashTableOpTest(test.TestCase): def testMapStringToFloat(self): with self.cached_session(): - keys = constant_op.constant(["a", "b", "c"], dtypes.string) - values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32) + + keys = constant_op.constant(["a", "b", "c", "d"], dtypes.string) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], dtypes.float32) default_value = constant_op.constant(-1.5, dtypes.float32) table = lookup.MutableDenseHashTable( dtypes.string, dtypes.float32, default_value=default_value, - empty_key="") + empty_key="", + deleted_key="$") self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["b", "e"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant(["a", "b", "d"], dtypes.string) + input_string = constant_op.constant(["a", "b", "d", "e"], dtypes.string) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testMapInt64ToFloat(self): for float_dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0.0, 1.1, 2.2], float_dtype) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], float_dtype) default_value = constant_op.constant(-1.5, float_dtype) table = lookup.MutableDenseHashTable( - dtypes.int64, float_dtype, default_value=default_value, empty_key=0) + dtypes.int64, + float_dtype, + default_value=default_value, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testVectorValues(self): with self.cached_session(): @@ -895,6 +997,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -908,26 +1011,35 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + remove_string = constant_op.constant([12, 16], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual( - [3, 4], output.shape, msg="Saw shape: %s" % output.shape) + self.assertAllEqual([4, 4], + output.shape, + msg="Saw shape: %s" % output.shape) result = output.eval() - self.assertAllEqual([[0, 1, 2, 3], [3, 4, 5, 6], [-1, -2, -3, -4]], - result) + self.assertAllEqual( + [[0, 1, 2, 3], [-1, -2, -3, -4], [2, 3, 4, 5], [-1, -2, -3, -4]], + result) def testVectorKeys(self): with self.cached_session(): keys = constant_op.constant([[0, 1], [1, 2], [1, 3]], dtypes.int64) values = constant_op.constant([10, 11, 12], dtypes.int64) empty_key = constant_op.constant([0, 3], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -940,13 +1052,18 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([[0, 1], [1, 2], [0, 2]], + remove_string = constant_op.constant([[1, 2], [7, 8]], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([[0, 1], [1, 2], [1, 3], [0, 2]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllEqual([10, 11, -1], result) + self.assertAllEqual([10, -1, 12, -1], result) def testResize(self): with self.cached_session(): @@ -957,6 +1074,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -964,31 +1082,42 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(4, len(table.export()[0].eval())) - keys2 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) - values2 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) + keys2 = constant_op.constant([12, 99], dtypes.int64) + table.remove(keys2).run() + self.assertAllEqual(2, table.size().eval()) + self.assertAllEqual(4, len(table.export()[0].eval())) + + keys3 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) + values3 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) - table.insert(keys2, values2).run() - self.assertAllEqual(7, table.size().eval()) + table.insert(keys3, values3).run() + self.assertAllEqual(6, table.size().eval()) self.assertAllEqual(16, len(table.export()[0].eval())) - keys3 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], + keys4 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], dtypes.int64) - output = table.lookup(keys3) - self.assertAllEqual([-1, 0, 1, 3, 4, 5, 6, 7, -1], output.eval()) + output = table.lookup(keys4) + self.assertAllEqual([-1, 0, -1, 3, 4, 5, 6, 7, -1], output.eval()) def testExport(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([1, 2, 3], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([1, 2, 3, 4], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=-1, empty_key=100, + deleted_key=200, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) exported_keys, exported_values = table.export() @@ -1005,8 +1134,8 @@ class MutableDenseHashTableOpTest(test.TestCase): pairs = np.dstack((np_keys.flatten(), np_values.flatten()))[0] # sort by key pairs = pairs[pairs[:, 0].argsort()] - self.assertAllEqual([[11, 1], [12, 2], [13, 3], [100, 0], [100, 0], - [100, 0], [100, 0], [100, 0]], pairs) + self.assertAllEqual([[11, 1], [13, 3], [14, 4], [100, 0], [100, 0], + [100, 0], [100, 0], [200, 2]], pairs) def testSaveRestore(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") @@ -1015,13 +1144,15 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: default_value = -1 empty_key = 0 - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + deleted_key = -1 + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1030,6 +1161,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1043,6 +1179,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1062,7 +1199,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant([10, 11, 12, 13, 14], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([-1, 0, 1, 2, -1], output.eval()) + self.assertAllEqual([-1, 0, -1, 2, 3], output.eval()) @test_util.run_in_graph_and_eager_modes def testObjectSaveRestore(self): @@ -1071,6 +1208,7 @@ class MutableDenseHashTableOpTest(test.TestCase): default_value = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) save_table = lookup.MutableDenseHashTable( @@ -1078,6 +1216,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1097,6 +1236,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1124,14 +1264,18 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([[0, 1], [2, 3], [2, 4], [4, 5]], + dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1140,6 +1284,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [16, 17]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1149,12 +1298,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1184,14 +1335,17 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=32) @@ -1200,6 +1354,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [15, 16]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1209,12 +1368,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=64) @@ -1235,7 +1396,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant( [[11, 12], [11, 14], [11, 15], [13, 14], [13, 15]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([0, 1, -1, 2, -1], output.eval()) + self.assertAllEqual([0, 1, -1, 3, -1], output.eval()) def testReprobe(self): with self.cached_session(): @@ -1248,6 +1409,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -1267,7 +1429,11 @@ class MutableDenseHashTableOpTest(test.TestCase): keys = constant_op.constant([11, 0, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=12) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=12, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() @@ -1283,19 +1449,35 @@ class MutableDenseHashTableOpTest(test.TestCase): def testErrors(self): with self.cached_session(): table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) # Inserting the empty key returns an error - keys = constant_op.constant([11, 0], dtypes.int64) - values = constant_op.constant([0, 1], dtypes.int64) + keys1 = constant_op.constant([11, 0], dtypes.int64) + values1 = constant_op.constant([0, 1], dtypes.int64) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.insert(keys, values).run() + table.insert(keys1, values1).run() # Looking up the empty key returns an error with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.lookup(keys).eval() + table.lookup(keys1).eval() + + # Inserting the deleted key returns an error + keys2 = constant_op.constant([11, -1], dtypes.int64) + values2 = constant_op.constant([0, 1], dtypes.int64) + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.insert(keys2, values2).run() + + # Looking up the empty key returns an error + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.lookup(keys2).eval() # Arbitrary tensors of keys are not supported keys = constant_op.constant([[11, 0], [12, 1]], dtypes.int64) @@ -1312,11 +1494,43 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=17, + deleted_key=-1, initial_num_buckets=12) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Number of buckets must be"): self.assertAllEqual(0, table2.size().eval()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Empty and deleted keys must have same shape"): + table3 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=[1, 2]) + self.assertAllEqual(0, table3.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table4 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table4.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table5 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=[1, 2, 3], + deleted_key=[1, 2, 3]) + self.assertAllEqual(0, table5.size().eval()) + class IndexTableFromFile(test.TestCase): @@ -2558,7 +2772,11 @@ class MutableDenseHashTableBenchmark(MutableHashTableBenchmark): def _create_table(self): return lookup.MutableDenseHashTable( - dtypes.int64, dtypes.float32, default_value=0.0, empty_key=-1) + dtypes.int64, + dtypes.float32, + default_value=0.0, + empty_key=-1, + deleted_key=-2) if __name__ == "__main__": diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 03da2b82e5..9c585fe6a7 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -543,20 +543,25 @@ class TupleOfTensorsLookup(lookup.LookupInterface): overhead. """ - def __init__( - self, key_dtype, default_values, empty_key, name, checkpoint=True): + def __init__(self, + key_dtype, + default_values, + empty_key, + deleted_key, + name, + checkpoint=True): default_values_flat = nest.flatten(default_values) - self._hash_tables = nest.pack_sequence_as( - default_values, - [TensorValuedMutableDenseHashTable( + self._hash_tables = nest.pack_sequence_as(default_values, [ + TensorValuedMutableDenseHashTable( key_dtype=key_dtype, value_dtype=default_value.dtype.base_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name=name + "_{}".format(table_number), checkpoint=checkpoint) - for table_number, default_value - in enumerate(default_values_flat)]) + for table_number, default_value in enumerate(default_values_flat) + ]) self._name = name def lookup(self, keys): diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py index c0de42b15b..91265b9b2e 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py @@ -223,10 +223,12 @@ class TestLookupTable(test.TestCase): hash_table = math_utils.TupleOfTensorsLookup( key_dtype=dtypes.int64, default_values=[[ - array_ops.ones([3, 2], dtype=dtypes.float32), array_ops.zeros( - [5], dtype=dtypes.float64) - ], array_ops.ones([7, 7], dtype=dtypes.int64)], + array_ops.ones([3, 2], dtype=dtypes.float32), + array_ops.zeros([5], dtype=dtypes.float64) + ], + array_ops.ones([7, 7], dtype=dtypes.int64)], empty_key=-1, + deleted_key=-2, name="test_lookup") def stack_tensor(base_tensor): return array_ops.stack([base_tensor + 1, base_tensor + 2]) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management.py b/tensorflow/contrib/timeseries/python/timeseries/state_management.py index 13eecd4d82..138406c616 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_management.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_management.py @@ -149,6 +149,7 @@ class ChainingStateManager(_OverridableStateManager): key_dtype=dtypes.int64, default_values=self._start_state, empty_key=-1, + deleted_key=-2, name="cached_states", checkpoint=self._checkpoint_state) diff --git a/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt new file mode 100644 index 0000000000..333fe6f4b2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt @@ -0,0 +1,24 @@ +op { + graph_op_name: "LookupTableRemoveV2" + visibility: HIDDEN + endpoint { + name: "LookupTableRemove" + } + in_arg { + name: "table_handle" + description: <(); + + mutex_lock l(mu_); + for (int64 i = 0; i < key_values.size(); ++i) { + table_.erase(SubtleMustCopyIfIntegral(key_values(i))); + } + return Status::OK(); + } + Status ImportValues(OpKernelContext* ctx, const Tensor& keys, const Tensor& values) override { return DoInsert(true, keys, values); @@ -212,6 +222,16 @@ class MutableHashTableOfTensors final : public LookupInterface { return DoInsert(false, keys, values); } + Status Remove(OpKernelContext* ctx, const Tensor& keys) override { + const auto key_values = keys.flat(); + + mutex_lock l(mu_); + for (int64 i = 0; i < key_values.size(); ++i) { + table_.erase(SubtleMustCopyIfIntegral(key_values(i))); + } + return Status::OK(); + } + Status ImportValues(OpKernelContext* ctx, const Tensor& keys, const Tensor& values) override { return DoInsert(true, keys, values); @@ -326,6 +346,29 @@ class MutableDenseHashTable final : public LookupInterface { empty_key_input->template shaped({1, key_shape_.num_elements()}), 0); + const Tensor* deleted_key_input; + OP_REQUIRES_OK(ctx, ctx->input("deleted_key", &deleted_key_input)); + OP_REQUIRES(ctx, key_shape_.IsSameSize(deleted_key_input->shape()), + errors::InvalidArgument( + "Empty and deleted keys must have same shape, got shapes: ", + key_shape_.DebugString(), " and ", + deleted_key_input->shape().DebugString())); + deleted_key_ = PersistentTensor(*deleted_key_input); + deleted_key_hash_ = HashKey(deleted_key_input->template shaped( + {1, key_shape_.num_elements()}), + 0); + + if (empty_key_hash_ == deleted_key_hash_) { + const int64 key_size = key_shape_.num_elements(); + const auto empty_key_matrix = + empty_key_.AccessTensor(ctx)->template shaped({1, key_size}); + const auto deleted_key_matrix = + deleted_key_.AccessTensor(ctx)->template shaped({1, key_size}); + OP_REQUIRES( + ctx, !IsEqualKey(empty_key_matrix, 0, deleted_key_matrix, 0), + errors::InvalidArgument("Empty and deleted keys cannot be equal")); + } + int64 initial_num_buckets; OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "initial_num_buckets", &initial_num_buckets)); @@ -360,6 +403,8 @@ class MutableDenseHashTable final : public LookupInterface { value_buckets_.AccessTensor(ctx)->template matrix(); const auto empty_key_matrix = empty_key_.AccessTensor(ctx)->template shaped({1, key_size}); + const auto deleted_key_matrix = + deleted_key_.AccessTensor(ctx)->template shaped({1, key_size}); const int64 bit_mask = num_buckets_ - 1; // TODO(andreasst): parallelize using work_sharder for (int64 i = 0; i < num_elements; ++i) { @@ -369,6 +414,11 @@ class MutableDenseHashTable final : public LookupInterface { return errors::InvalidArgument( "Using the empty_key as a table key is not allowed"); } + if (deleted_key_hash_ == key_hash && + IsEqualKey(deleted_key_matrix, 0, key_matrix, i)) { + return errors::InvalidArgument( + "Using the deleted_key as a table key is not allowed"); + } int64 bucket_index = key_hash & bit_mask; int64 num_probes = 0; while (true) { @@ -425,23 +475,40 @@ class MutableDenseHashTable final : public LookupInterface { return DoInsert(ctx, key, value, false); } + Status Remove(OpKernelContext* ctx, const Tensor& key) override + LOCKS_EXCLUDED(mu_) { + if (key.NumElements() != key.dim_size(0) * key_shape_.num_elements()) { + TensorShape expected_shape({key.dim_size(0)}); + expected_shape.AppendShape(key_shape_); + return errors::InvalidArgument("Expected key shape ", + expected_shape.DebugString(), " got ", + key.shape().DebugString()); + } + mutex_lock l(mu_); + return DoRemove(ctx, key); + } + Status ImportValues(OpKernelContext* ctx, const Tensor& keys, const Tensor& values) override LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); num_buckets_ = keys.dim_size(0); key_buckets_ = PersistentTensor(keys); value_buckets_ = PersistentTensor(values); - // Count the number of keys that are not the empty_key. This requires - // iterating through the whole table but that is OK as we only execute it - // during checkpoint restore. + // Count the number of keys that are not the empty_key or deleted_key. + // This requires iterating through the whole table but that is OK as we + // only execute it during checkpoint restore. num_entries_ = 0; const auto empty_key_tensor = empty_key_.AccessTensor(ctx)->template shaped( {1, key_shape_.num_elements()}); + const auto deleted_key_tensor = + deleted_key_.AccessTensor(ctx)->template shaped( + {1, key_shape_.num_elements()}); const auto key_buckets_tensor = key_buckets_.AccessTensor(ctx)->template matrix(); for (int64 i = 0; i < num_buckets_; ++i) { - if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0)) { + if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0) && + !IsEqualKey(key_buckets_tensor, i, deleted_key_tensor, 0)) { ++num_entries_; } } @@ -498,7 +565,8 @@ class MutableDenseHashTable final : public LookupInterface { private: Status DoInsert(OpKernelContext* ctx, const Tensor& key, const Tensor& value, - bool ignore_empty_key) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + bool ignore_empty_and_deleted_key) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { const int64 num_elements = (key.dims() == 0) ? 1 : key.dim_size(0); const int64 value_size = value_shape_.num_elements(); const int64 key_size = key_shape_.num_elements(); @@ -511,17 +579,27 @@ class MutableDenseHashTable final : public LookupInterface { value_buckets_.AccessTensor(ctx)->template matrix(); const auto empty_key_tensor = empty_key_.AccessTensor(ctx)->template shaped({1, key_size}); + const auto deleted_key_tensor = + deleted_key_.AccessTensor(ctx)->template shaped({1, key_size}); const int64 bit_mask = num_buckets_ - 1; for (int64 i = 0; i < num_elements; ++i) { const uint64 key_hash = HashKey(key_matrix, i); if (empty_key_hash_ == key_hash && IsEqualKey(empty_key_tensor, 0, key_matrix, i)) { - if (ignore_empty_key) { + if (ignore_empty_and_deleted_key) { continue; } return errors::InvalidArgument( "Using the empty_key as a table key is not allowed"); } + if (deleted_key_hash_ == key_hash && + IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) { + if (ignore_empty_and_deleted_key) { + continue; + } + return errors::InvalidArgument( + "Using the deleted_key as a table key is not allowed"); + } int64 bucket_index = key_hash & bit_mask; int64 num_probes = 0; while (true) { @@ -532,7 +610,9 @@ class MutableDenseHashTable final : public LookupInterface { } break; } - if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) { + if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0) || + IsEqualKey(key_buckets_matrix, bucket_index, deleted_key_tensor, + 0)) { ++num_entries_; for (int64 j = 0; j < key_size; ++j) { key_buckets_matrix(bucket_index, j) = @@ -556,6 +636,59 @@ class MutableDenseHashTable final : public LookupInterface { return Status::OK(); } + Status DoRemove(OpKernelContext* ctx, const Tensor& key) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + const int64 num_elements = key.dim_size(0); + const int64 key_size = key_shape_.num_elements(); + const auto key_matrix = key.shaped({num_elements, key_size}); + + auto key_buckets_matrix = + key_buckets_.AccessTensor(ctx)->template matrix(); + const auto empty_key_tensor = + empty_key_.AccessTensor(ctx)->template shaped({1, key_size}); + const auto deleted_key_tensor = + deleted_key_.AccessTensor(ctx)->template shaped({1, key_size}); + const auto deleted_key_flat = + deleted_key_.AccessTensor(ctx)->template flat(); + const int64 bit_mask = num_buckets_ - 1; + for (int64 i = 0; i < num_elements; ++i) { + const uint64 key_hash = HashKey(key_matrix, i); + if (empty_key_hash_ == key_hash && + IsEqualKey(empty_key_tensor, 0, key_matrix, i)) { + return errors::InvalidArgument( + "Using the empty_key as a table key is not allowed"); + } + if (deleted_key_hash_ == key_hash && + IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) { + return errors::InvalidArgument( + "Using the deleted_key as a table key is not allowed"); + } + int64 bucket_index = key_hash & bit_mask; + int64 num_probes = 0; + while (true) { + if (IsEqualKey(key_buckets_matrix, bucket_index, key_matrix, i)) { + --num_entries_; + for (int64 j = 0; j < key_size; ++j) { + key_buckets_matrix(bucket_index, j) = + SubtleMustCopyIfIntegral(deleted_key_flat(j)); + } + break; + } + if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) { + break; + } + ++num_probes; + bucket_index = + (bucket_index + num_probes) & bit_mask; // quadratic probing + if (num_probes >= num_buckets_) { + return errors::Internal( + "Internal error in MutableDenseHashTable remove"); + } + } + } + return Status::OK(); + } + Status AllocateBuckets(OpKernelContext* ctx, int64 new_num_buckets) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (new_num_buckets < 4 || @@ -639,7 +772,9 @@ class MutableDenseHashTable final : public LookupInterface { PersistentTensor value_buckets_ GUARDED_BY(mu_); PersistentTensor empty_key_; uint64 empty_key_hash_; -}; + PersistentTensor deleted_key_; + uint64 deleted_key_hash_; +}; // namespace lookup } // namespace lookup @@ -717,6 +852,39 @@ REGISTER_KERNEL_BUILDER(Name("LookupTableInsert").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("LookupTableInsertV2").Device(DEVICE_CPU), LookupTableInsertOp); +// Table remove op. +class LookupTableRemoveOp : public OpKernel { + public: + explicit LookupTableRemoveOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + lookup::LookupInterface* table; + OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); + core::ScopedUnref unref_me(table); + + DataType expected_input_0 = + (ctx->input_dtype(0) == DT_RESOURCE) ? DT_RESOURCE : DT_STRING_REF; + DataTypeVector expected_inputs = {expected_input_0, table->key_dtype()}; + OP_REQUIRES_OK(ctx, ctx->MatchSignature(expected_inputs, {})); + + const Tensor& key = ctx->input(1); + OP_REQUIRES_OK(ctx, table->CheckKeyTensorForRemove(key)); + + int64 memory_used_before = 0; + if (ctx->track_allocations()) { + memory_used_before = table->MemoryUsed(); + } + OP_REQUIRES_OK(ctx, table->Remove(ctx, key)); + if (ctx->track_allocations()) { + ctx->record_persistent_memory_allocation(table->MemoryUsed() - + memory_used_before); + } + } +}; + +REGISTER_KERNEL_BUILDER(Name("LookupTableRemoveV2").Device(DEVICE_CPU), + LookupTableRemoveOp); + // Op that returns the size of the given table. class LookupTableSizeOp : public OpKernel { public: diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index cfb1055d3c..415e15b720 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -30320,6 +30320,22 @@ op { } is_stateful: true } +op { + name: "LookupTableRemoveV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + input_arg { + name: "keys" + type_attr: "Tin" + } + attr { + name: "Tin" + type: "type" + } + is_stateful: true +} op { name: "LookupTableSize" input_arg { @@ -36706,6 +36722,10 @@ op { name: "empty_key" type_attr: "key_dtype" } + input_arg { + name: "deleted_key" + type_attr: "key_dtype" + } output_arg { name: "table_handle" type: DT_RESOURCE diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc index 72a77be70d..a0987cd982 100644 --- a/tensorflow/core/ops/lookup_ops.cc +++ b/tensorflow/core/ops/lookup_ops.cc @@ -214,6 +214,19 @@ REGISTER_OP("LookupTableInsertV2") return Status::OK(); }); +REGISTER_OP("LookupTableRemoveV2") + .Input("table_handle: resource") + .Input("keys: Tin") + .Attr("Tin: type") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle handle; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &handle)); + + // TODO(turboale): Validate keys shape. + return Status::OK(); + }); + REGISTER_OP("LookupTableSize") .Input("table_handle: Ref(string)") .Output("size: int64") @@ -407,6 +420,7 @@ REGISTER_OP("MutableDenseHashTable") REGISTER_OP("MutableDenseHashTableV2") .Input("empty_key: key_dtype") + .Input("deleted_key: key_dtype") .Output("table_handle: resource") .Attr("container: string = ''") .Attr("shared_name: string = ''") -- GitLab From d78c747e9177fc93d43a580acef2b62eb1420859 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Tue, 9 Oct 2018 16:39:33 -0700 Subject: [PATCH 160/411] Make lite_test.py run in open source. PiperOrigin-RevId: 216445964 --- tensorflow/contrib/lite/python/BUILD | 2 -- tensorflow/contrib/lite/python/lite_test.py | 14 +++++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 916788f215..be6c44d306 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -73,7 +73,6 @@ py_test( data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"], srcs_version = "PY2AND3", tags = [ - "no_oss", "no_windows", ], deps = [ @@ -172,7 +171,6 @@ py_test( srcs = ["convert_saved_model_test.py"], srcs_version = "PY2AND3", tags = [ - "no_oss", "no_windows", ], visibility = ["//visibility:public"], diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index d243a494f6..ef9bbded2a 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -591,11 +591,19 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase): 'Unable to parse input file \'{}\'.'.format(graph_def_file), str(error.exception)) - # TODO(nupurgarg): Test model loading in open source. def _initObjectDetectionArgs(self): # Initializes the arguments required for the object detection model. - self._graph_def_file = resource_loader.get_path_to_datafile( - 'testdata/tflite_graph.pb') + # Looks for the model file which is saved in a different location interally + # and externally. + filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb') + if not os.path.exists(filename): + filename = os.path.join( + resource_loader.get_root_dir_with_all_resources(), + '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb') + if not os.path.exists(filename): + raise IOError("File '{0}' does not exist.".format(filename)) + + self._graph_def_file = filename self._input_arrays = ['normalized_input_image_tensor'] self._output_arrays = [ 'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1', -- GitLab From ef9d2e7be9ae9fbcd4720d46e1f8a8cac902a1cd Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 9 Oct 2018 16:44:25 -0700 Subject: [PATCH 161/411] Remove the deprecated created and IS_LOCAL abstractions from activity analysis. PiperOrigin-RevId: 216446750 --- tensorflow/python/autograph/pyct/anno.py | 2 - .../pyct/static_analysis/activity.py | 82 ++---- .../pyct/static_analysis/activity_test.py | 268 +++++++----------- .../pyct/static_analysis/live_values.py | 5 +- 4 files changed, 121 insertions(+), 236 deletions(-) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index 5392e6ea03..e1f4af46cd 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -63,10 +63,8 @@ class Static(NoValue): The enum values are used strictly for documentation purposes. """ - # Deprecated - use reaching definitions instead. # Symbols # These flags are boolean. - IS_LOCAL = 'Symbol is local to the function scope being analyzed.' IS_PARAM = 'Symbol is a parameter to the function being analyzed.' # Scopes diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py index 086eda7574..cc159031ff 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py @@ -44,7 +44,6 @@ class Scope(object): Attributes: modified: identifiers modified in this scope - created: identifiers created in this scope used: identifiers referenced in this scope """ @@ -54,7 +53,8 @@ class Scope(object): Args: parent: A Scope or None. isolated: Whether the scope is isolated, that is, whether variables - created in this scope should be visible to the parent scope. + modified in this scope should be considered modified in the parent + scope. add_unknown_symbols: Whether to handle attributed and subscripts without having first seen the base name. E.g., analyzing the statement 'x.y = z' without first having seen 'x'. @@ -63,13 +63,11 @@ class Scope(object): self.parent = parent self.add_unknown_symbols = add_unknown_symbols self.modified = set() - # TODO(mdan): Completely remove this. - self.created = set() self.used = set() self.params = {} self.returned = set() - # TODO(mdan): Rename to `locals` + # TODO(mdan): Rename to `reserved` @property def referenced(self): if not self.isolated and self.parent is not None: @@ -77,8 +75,7 @@ class Scope(object): return self.used def __repr__(self): - return 'Scope{r=%s, c=%s, w=%s}' % (tuple(self.used), tuple(self.created), - tuple(self.modified)) + return 'Scope{r=%s, w=%s}' % (tuple(self.used), tuple(self.modified)) def copy_from(self, other): """Recursively copies the contents of this scope from another scope.""" @@ -88,7 +85,6 @@ class Scope(object): self.parent.copy_from(other.parent) self.isolated = other.isolated self.modified = copy.copy(other.modified) - self.created = copy.copy(other.created) self.used = copy.copy(other.used) self.params = copy.copy(other.params) self.returned = copy.copy(other.returned) @@ -109,56 +105,28 @@ class Scope(object): if other.parent is not None: self.parent.merge_from(other.parent) self.modified |= other.modified - self.created |= other.created self.used |= other.used self.params.update(other.params) self.returned |= other.returned - def has(self, name): - if name in self.modified: - return True - elif self.parent is not None: - return self.parent.has(name) - return False - def mark_read(self, name): self.used.add(name) - if self.parent is not None and name not in self.created: + if self.parent is not None and name not in self.params: self.parent.mark_read(name) + def mark_modified(self, name): + """Marks the given symbol as modified in the current scope.""" + self.modified.add(name) + if not self.isolated: + if self.parent is not None: + self.parent.mark_modified(name) + def mark_param(self, name, owner): # Assumption: all AST nodes have the same life span. This lets us use # a weak reference to mark the connection between a symbol node and the # function node whose argument that symbol is. self.params[name] = weakref.ref(owner) - def mark_creation(self, name, writes_create_symbol=False): - """Mark a qualified name as created.""" - if name.is_composite(): - parent = name.parent - if not writes_create_symbol: - return - else: - if not self.has(parent): - if self.add_unknown_symbols: - self.mark_read(parent) - else: - raise ValueError('Unknown symbol "%s".' % parent) - self.created.add(name) - - def mark_write(self, name): - """Marks the given symbol as modified in the current scope.""" - self.modified.add(name) - if self.isolated: - self.mark_creation(name) - else: - if self.parent is None: - self.mark_creation(name) - else: - if not self.parent.has(name): - self.mark_creation(name) - self.parent.mark_write(name) - def mark_returned(self, name): self.returned.add(name) if not self.isolated and self.parent is not None: @@ -197,10 +165,7 @@ class ActivityAnalyzer(transformer.Base): return True return False - def _track_symbol(self, - node, - composite_writes_alter_parent=False, - writes_create_symbol=False): + def _track_symbol(self, node, composite_writes_alter_parent=False): # A QN may be missing when we have an attribute (or subscript) on a function # call. Example: a().b if not anno.hasanno(node, anno.Basic.QN): @@ -208,11 +173,9 @@ class ActivityAnalyzer(transformer.Base): qn = anno.getanno(node, anno.Basic.QN) if isinstance(node.ctx, gast.Store): - self.scope.mark_write(qn) + self.scope.mark_modified(qn) if qn.is_composite and composite_writes_alter_parent: - self.scope.mark_write(qn.parent) - if writes_create_symbol: - self.scope.mark_creation(qn, writes_create_symbol=True) + self.scope.mark_modified(qn.parent) if self._in_aug_assign: self.scope.mark_read(qn) elif isinstance(node.ctx, gast.Load): @@ -220,13 +183,11 @@ class ActivityAnalyzer(transformer.Base): elif isinstance(node.ctx, gast.Param): # Param contexts appear in function defs, so they have the meaning of # defining a variable. - self.scope.mark_write(qn) + self.scope.mark_modified(qn) self.scope.mark_param(qn, self.enclosing_entities[-1]) else: raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn)) - anno.setanno(node, NodeAnno.IS_LOCAL, self.scope.has(qn)) - if self._in_return_statement: self.scope.mark_returned(qn) @@ -243,6 +204,12 @@ class ActivityAnalyzer(transformer.Base): self._exit_scope() return node + def visit_nonlocal(self, node): + raise NotImplementedError() + + def visit_global(self, node): + raise NotImplementedError() + def visit_Expr(self, node): return self._process_statement(node) @@ -271,8 +238,7 @@ class ActivityAnalyzer(transformer.Base): def visit_Attribute(self, node): node = self.generic_visit(node) if self._in_constructor and self._node_sets_self_attribute(node): - self._track_symbol( - node, composite_writes_alter_parent=True, writes_create_symbol=True) + self._track_symbol(node, composite_writes_alter_parent=True) else: self._track_symbol(node) return node @@ -336,7 +302,7 @@ class ActivityAnalyzer(transformer.Base): # of its name, along with the usage of any decorator accompany it. self._enter_scope(False) node.decorator_list = self.visit_block(node.decorator_list) - self.scope.mark_write(qual_names.QN(node.name)) + self.scope.mark_modified(qual_names.QN(node.name)) anno.setanno(node, anno.Static.SCOPE, self.scope) self._exit_scope() diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py index d4a6ce8ac3..9a4f1bf09b 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py @@ -32,62 +32,63 @@ from tensorflow.python.platform import test class ScopeTest(test.TestCase): + def assertMissing(self, qn, scope): + self.assertNotIn(qn, scope.used) + self.assertNotIn(qn, scope.modified) + + def assertReadOnly(self, qn, scope): + self.assertIn(qn, scope.used) + self.assertNotIn(qn, scope.modified) + + def assertWriteOnly(self, qn, scope): + self.assertNotIn(qn, scope.used) + self.assertIn(qn, scope.modified) + + def assertReadWrite(self, qn, scope): + self.assertIn(qn, scope.used) + self.assertIn(qn, scope.modified) + def test_basic(self): scope = activity.Scope(None) - self.assertFalse(scope.has(QN('foo'))) + self.assertMissing(QN('foo'), scope) scope.mark_read(QN('foo')) - self.assertFalse(scope.has(QN('foo'))) - - scope.mark_write(QN('foo')) - self.assertTrue(scope.has(QN('foo'))) + self.assertReadOnly(QN('foo'), scope) - scope.mark_read(QN('bar')) - self.assertFalse(scope.has(QN('bar'))) + scope.mark_modified(QN('foo')) + self.assertReadWrite(QN('foo'), scope) def test_copy_from(self): scope = activity.Scope(None) - scope.mark_write(QN('foo')) - + scope.mark_modified(QN('foo')) other = activity.Scope(None) other.copy_from(scope) - self.assertTrue(QN('foo') in other.modified) + self.assertWriteOnly(QN('foo'), other) - scope.mark_write(QN('bar')) + scope.mark_modified(QN('bar')) scope.copy_from(other) - self.assertFalse(QN('bar') in scope.modified) + self.assertMissing(QN('bar'), scope) - scope.mark_write(QN('bar')) + scope.mark_modified(QN('bar')) scope.merge_from(other) - self.assertTrue(QN('bar') in scope.modified) - self.assertFalse(QN('bar') in other.modified) + self.assertWriteOnly(QN('bar'), scope) + self.assertMissing(QN('bar'), other) def test_copy_of(self): scope = activity.Scope(None) scope.mark_read(QN('foo')) + other = activity.Scope.copy_of(scope) - self.assertTrue(QN('foo') in activity.Scope.copy_of(scope).used) + self.assertReadOnly(QN('foo'), other) child_scope = activity.Scope(scope) child_scope.mark_read(QN('bar')) + other = activity.Scope.copy_of(child_scope) - self.assertTrue(QN('bar') in activity.Scope.copy_of(child_scope).used) - - def test_nesting(self): - scope = activity.Scope(None) - scope.mark_write(QN('foo')) - scope.mark_read(QN('bar')) - - child = activity.Scope(scope) - self.assertTrue(child.has(QN('foo'))) - self.assertTrue(scope.has(QN('foo'))) - - child.mark_write(QN('bar')) - self.assertTrue(child.has(QN('bar'))) - self.assertFalse(scope.has(QN('bar'))) + self.assertReadOnly(QN('bar'), other) def test_referenced(self): scope = activity.Scope(None) @@ -123,25 +124,6 @@ class ActivityAnalyzerTest(test.TestCase): node = activity.resolve(node, entity_info) return node, entity_info - def test_local_markers(self): - - def test_fn(a): # pylint:disable=unused-argument - b = c # pylint:disable=undefined-variable - while b > 0: - b -= 1 - return b - - node, _ = self._parse_and_analyze(test_fn) - self.assertFalse( - anno.getanno(node.body[0].body[0].value, - NodeAnno.IS_LOCAL)) # c in b = c - self.assertTrue( - anno.getanno(node.body[0].body[1].test.left, - NodeAnno.IS_LOCAL)) # b in b > 0 - self.assertTrue( - anno.getanno(node.body[0].body[2].value, - NodeAnno.IS_LOCAL)) # b in return b - def assertSymbolSetsAre(self, expected, actual, name): expected = set(expected) actual = set(str(s) for s in actual) @@ -153,12 +135,10 @@ class ActivityAnalyzerTest(test.TestCase): ' Extra: %s\n' % (name.upper(), expected, actual, expected - actual, actual - expected)) - def assertScopeIsRmc(self, scope, used, modified, created): + def assertScopeIs(self, scope, used, modified): """Assert the scope contains specific used, modified & created variables.""" self.assertSymbolSetsAre(used, scope.used, 'read') self.assertSymbolSetsAre(modified, scope.modified, 'modified') - # Created is deprecated, we're no longer verifying it. - # self.assertSymbolSetsAre(created, scope.created, 'created') def test_print_statement(self): @@ -181,7 +161,7 @@ class ActivityAnalyzerTest(test.TestCase): print_args_scope = anno.getanno(print_node, NodeAnno.ARGS_SCOPE) # We basically need to detect which variables are captured by the call # arguments. - self.assertScopeIsRmc(print_args_scope, ('a', 'b'), (), ()) + self.assertScopeIs(print_args_scope, ('a', 'b'), ()) def test_call_args(self): @@ -195,8 +175,8 @@ class ActivityAnalyzerTest(test.TestCase): call_node = node.body[0].body[2].value # We basically need to detect which variables are captured by the call # arguments. - self.assertScopeIsRmc( - anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), (), ()) + self.assertScopeIs( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), ()) def test_call_args_attributes(self): @@ -210,12 +190,8 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) call_node = node.body[0].body[1].value - self.assertScopeIsRmc( - anno.getanno(call_node, NodeAnno.ARGS_SCOPE), - ('a', 'a.b', 'a.c'), - (), - (), - ) + self.assertScopeIs( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'a.b', 'a.c'), ()) def test_call_args_subscripts(self): @@ -230,12 +206,9 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) call_node = node.body[0].body[2].value - self.assertScopeIsRmc( + self.assertScopeIs( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), - ('a', 'a[0]', 'a[b]', 'b'), - (), - (), - ) + ('a', 'a[0]', 'a[b]', 'b'), ()) def test_while(self): @@ -248,14 +221,13 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) while_node = node.body[0].body[1] - self.assertScopeIsRmc( - anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), - ('c',)) - self.assertScopeIsRmc( + self.assertScopeIs( + anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c')) + self.assertScopeIs( anno.getanno(while_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'), - ('b', 'c'), ('a', 'b', 'c')) - self.assertScopeIsRmc( - anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), (), ()) + ('b', 'c')) + self.assertScopeIs( + anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), ()) def test_for(self): @@ -268,11 +240,11 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) for_node = node.body[0].body[1] - self.assertScopeIsRmc( - anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',)) - self.assertScopeIsRmc( + self.assertScopeIs( + anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c')) + self.assertScopeIs( anno.getanno(for_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'), - ('b', 'c', '_'), ('a', 'b', 'c', '_')) + ('b', 'c', '_')) def test_if(self): @@ -289,18 +261,16 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'), - ('y', 'z')) - # TODO(mdan): Double check: is it ok to not mark a local symbol as not read? - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'z', 'u'), - ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u')) - self.assertScopeIsRmc( + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z')) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'y', 'z', 'u'), + ('x', 'y', 'z', 'u')) + self.assertScopeIs( anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('x', 'y'), - ('x', 'y', 'u'), ('y', 'u')) - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'z', 'u'), + ('x', 'y', 'u')) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u')) def test_if_attributes(self): @@ -316,24 +286,14 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE), - ('a', 'a.c'), - ('a.b', 'd'), - ('d',), - ) - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), - ('a', 'a.c'), - ('a.b', 'd'), - ('d',), - ) - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, - ('a', 'a.c', 'd'), - ('a.b', 'd'), - ('a', 'd'), - ) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'a.c'), ('a.b', 'd')) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'a.c'), + ('a.b', 'd')) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('a', 'a.c', 'd'), + ('a.b', 'd')) def test_if_subscripts(self): @@ -348,25 +308,15 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE), - ('a', 'b', 'c', 'a[c]'), - ('a[b]', 'd'), - ('d',), - ) + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'b', 'c', 'a[c]'), + ('a[b]', 'd')) # TODO(mdan): Should subscript writes (a[0] = 1) be considered to read "a"? - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), - ('a', 'e'), - ('a[0]', 'd'), - ('d',), - ) - self.assertScopeIsRmc( + self.assertScopeIs( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'e'), ('a[0]', 'd')) + self.assertScopeIs( anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, - ('a', 'b', 'c', 'd', 'e', 'a[c]'), - ('d', 'a[b]', 'a[0]'), - ('a', 'b', 'c', 'd', 'e'), - ) + ('a', 'b', 'c', 'd', 'e', 'a[c]'), ('d', 'a[b]', 'a[0]')) def test_nested_if(self): @@ -380,12 +330,10 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) inner_if_node = node.body[0].body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',), - ('a',)) - self.assertScopeIsRmc( - anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',), - ('a',)) + self.assertScopeIs( + anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',)) + self.assertScopeIs( + anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',)) def test_nested_function(self): @@ -404,11 +352,8 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) fn_def_node = node.body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), ( - 'x', - 'y', - )) + self.assertScopeIs( + anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',)) def test_constructor_attributes(self): @@ -420,12 +365,9 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(TestClass) init_node = node.body[0].body[0] - self.assertScopeIsRmc( - anno.getanno(init_node, NodeAnno.BODY_SCOPE), - ('self', 'a', 'self.b'), - ('self', 'self.b', 'self.b.c'), - ('self', 'a', 'self.b'), - ) + self.assertScopeIs( + anno.getanno(init_node, NodeAnno.BODY_SCOPE), ('self', 'a', 'self.b'), + ('self', 'self.b', 'self.b.c')) def test_aug_assign_subscripts(self): @@ -434,12 +376,8 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] - self.assertScopeIsRmc( - anno.getanno(fn_node, NodeAnno.BODY_SCOPE), - ('a', 'a[0]'), - ('a[0]',), - ('a',), - ) + self.assertScopeIs( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'a[0]'), ('a[0]',)) def test_return_vars_are_read(self): @@ -448,16 +386,7 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] - self.assertScopeIsRmc( - anno.getanno(fn_node, NodeAnno.BODY_SCOPE), - ('c',), - (), - ( - 'a', - 'b', - 'c', - ), - ) + self.assertScopeIs(anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('c',), ()) def test_aug_assign(self): @@ -466,12 +395,8 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] - self.assertScopeIsRmc( - anno.getanno(fn_node, NodeAnno.BODY_SCOPE), - ('a', 'b'), - ('a'), - ('a', 'b'), - ) + self.assertScopeIs( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'b'), ('a')) def test_aug_assign_rvalues(self): @@ -485,23 +410,22 @@ class ActivityAnalyzerTest(test.TestCase): node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] - self.assertScopeIsRmc( - anno.getanno(fn_node, NodeAnno.BODY_SCOPE), - ('foo', 'x'), - (), - ('x',), - ) + self.assertScopeIs( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('foo', 'x'), ()) - def test_params_created(self): + def test_params(self): def test_fn(a, b): # pylint: disable=unused-argument return b node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] - self.assertScopeIsRmc( - anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('b',), (('')), - (('a', 'b'))) + body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(body_scope, ('b',), ()) + self.assertScopeIs(body_scope.parent, ('b',), ('a', 'b')) + + args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE) + self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params') if __name__ == '__main__': diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py index 4ceddce53b..dc363f9a47 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py +++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py @@ -28,7 +28,6 @@ import six from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import transformer -from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno # TODO(aqj): Do we need this? Do other builtins fail in similar ways @@ -133,11 +132,9 @@ class LiveValueResolver(transformer.Base): anno.setanno(node, 'fqn', anno.getanno(node.value, 'type_fqn') + (node.attr,)) elif isinstance(node.value, gast.Name): - stem_name = node.value - # All nonlocal symbols should be fully resolved. - assert anno.hasanno(stem_name, NodeAnno.IS_LOCAL), stem_name # TODO(mdan): Figure out what to do when calling attribute on local object # Maybe just leave as-is? + pass return node -- GitLab From c770568935b85d506dc1a1f671822a7e122b5056 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 16:48:34 -0700 Subject: [PATCH 162/411] Internal change. PiperOrigin-RevId: 216447412 --- .../contrib/lite/kernels/sparse_output_fully_connected.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc index 843ed0768c..226bba2d47 100644 --- a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc @@ -88,6 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const bool is_hybrid_op = (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32); + // Resize output. + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(1); + output_size_array->data[0] = 1; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size_array)); + if (is_hybrid_op) { TfLiteIntArrayFree(node->temporaries); node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors); -- GitLab From d4526cf9d1d58cbe480e7d2b8199620e0e9f0572 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 16:52:15 -0700 Subject: [PATCH 163/411] [XLA] Added xla::CreateModuleFromProto(...) combining loading module from proto and verifying it with HloVerifier. PiperOrigin-RevId: 216447947 --- tensorflow/compiler/xla/layout_util.cc | 2 +- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_instruction.cc | 14 ++- .../compiler/xla/service/hlo_proto_util.cc | 12 ++ .../compiler/xla/service/hlo_proto_util.h | 6 + .../compiler/xla/service/hlo_verifier.cc | 104 +++++++++++++++++- 6 files changed, 132 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index 3c8db9aa45..19667b7ed9 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -205,7 +205,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { return Status::OK(); } - if (layout.format() == INVALID_FORMAT) { + if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) { return InvalidArgument( "Layout does not have a valid format: layout {%s}, shape {%s}", layout.ShortDebugString(), shape.ShortDebugString()); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2b292ed053..f9f741aaee 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -3127,6 +3127,7 @@ cc_library( ":buffer_assignment", ":hlo", ":hlo_proto", + ":hlo_verifier", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:util", ], diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 050d28b289..09bcf8a9e7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -305,6 +305,9 @@ StatusOr> HloInstruction::CreateFromProto( proto.tuple_index()); break; case HloOpcode::kReducePrecision: + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "ReducePrecision instruction should have 1 operand but sees " + << proto.operand_ids_size(); instruction = CreateReducePrecision(proto.shape(), operands(0), proto.exponent_bits(), proto.mantissa_bits()); @@ -312,12 +315,16 @@ StatusOr> HloInstruction::CreateFromProto( case HloOpcode::kInfeed: { const Shape& data_shape = ShapeUtil::GetTupleElementShape(proto.shape(), 0); - TF_RET_CHECK(proto.operand_ids_size() == 1); + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "Infeed instruction should have 1 operand but sees " + << proto.operand_ids_size(); instruction = CreateInfeed(data_shape, operands(0), proto.infeed_config()); } break; case HloOpcode::kOutfeed: - TF_RET_CHECK(proto.operand_ids_size() == 2); + TF_RET_CHECK(proto.operand_ids_size() == 2) + << "Outfeed instruction should have 2 operands but sees " + << proto.operand_ids_size(); TF_RETURN_IF_ERROR( ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape())); instruction = CreateOutfeed(proto.outfeed_shape(), operands(0), @@ -349,6 +356,9 @@ StatusOr> HloInstruction::CreateFromProto( break; } case HloOpcode::kCollectivePermute: { + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "CollectivePermute instruction should have 1 operand but sees " + << proto.operand_ids_size(); std::vector> source_target_pairs( proto.source_target_pairs_size()); for (int i = 0; i < source_target_pairs.size(); i++) { diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc index b9c0b0c4ee..026a0e8fba 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.cc +++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_proto_util.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" #include @@ -36,6 +37,17 @@ HloProto MakeHloProto(const HloModule& module) { return proto; } +StatusOr> CreateModuleFromProto( + const HloModuleProto& proto, const HloModuleConfig& module_config) { + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(proto, module_config)); + TF_RETURN_IF_ERROR( + HloVerifier(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false) + .Run(module.get()) + .status()); + return std::move(module); +} + StatusOr> EntryComputationParameterShapes( const HloProto& hlo_proto) { if (!hlo_proto.has_hlo_module()) { diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h index 3d9c375cd5..1db82dd6fc 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.h +++ b/tensorflow/compiler/xla/service/hlo_proto_util.h @@ -35,6 +35,12 @@ HloProto MakeHloProto(const HloModule& module, // will not be included in the output. HloProto MakeHloProto(const HloModule& module); +// Create an HLO state from serialized representation. In addition to +// creating the proto with HloModule::CreateFromProto(...) it also +// uses HloVerifier to ensure basic invariants are held. +StatusOr> CreateModuleFromProto( + const HloModuleProto& proto, const HloModuleConfig& module_config); + // Returns the shapes of the parameters of the entry computation. Shape pointers // refer to shapes inside of the given HloProto. StatusOr> EntryComputationParameterShapes( diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index be3bee5975..620458855f 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -27,6 +27,15 @@ limitations under the License. namespace xla { +static Status CheckOperandCount(const HloInstruction* hlo, int expected) { + if (hlo->operand_count() != expected) { + return InternalError("Expected %d operands for %s instruction: %s", + expected, HloOpcodeString(hlo->opcode()), + hlo->ToString()); + } + return Status::OK(); +} + Status ShapeVerifier::HandleElementwiseUnary(HloInstruction* hlo) { return CheckUnaryShape(hlo); } @@ -58,12 +67,14 @@ Status ShapeVerifier::HandleConcatenate(HloInstruction* concatenate) { } Status ShapeVerifier::HandleConvert(HloInstruction* convert) { + TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1)); return CheckShape(convert, ShapeInference::InferConvertShape( convert->operand(0)->shape(), convert->shape().element_type())); } Status ShapeVerifier::HandleBitcastConvert(HloInstruction* convert) { + TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1)); return CheckShape(convert, ShapeInference::InferBitcastConvertShape( convert->operand(0)->shape(), convert->shape().element_type())); @@ -74,6 +85,7 @@ Status ShapeVerifier::HandleCopy(HloInstruction* copy) { } Status ShapeVerifier::HandleDot(HloInstruction* dot) { + TF_RETURN_IF_ERROR(CheckOperandCount(dot, 2)); TF_ASSIGN_OR_RETURN(const Shape expected, ShapeInference::InferDotOpShape( dot->operand(0)->shape(), dot->operand(1)->shape(), @@ -82,6 +94,7 @@ Status ShapeVerifier::HandleDot(HloInstruction* dot) { } Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) { + TF_RETURN_IF_ERROR(CheckOperandCount(convolution, 2)); TF_ASSIGN_OR_RETURN( const Shape expected, ShapeInference::InferConvolveShape( @@ -92,6 +105,7 @@ Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) { } Status ShapeVerifier::HandleFft(HloInstruction* fft) { + TF_RETURN_IF_ERROR(CheckOperandCount(fft, 1)); TF_ASSIGN_OR_RETURN( const Shape expected, ShapeInference::InferFftShape(fft->operand(0)->shape(), fft->fft_type(), @@ -118,11 +132,13 @@ Status ShapeVerifier::HandleAllToAll(HloInstruction* hlo) { } Status ShapeVerifier::HandleCollectivePermute(HloInstruction* hlo) { + TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 1)); return CheckShape(hlo, ShapeInference::InferCollectivePermuteShape( hlo->operand(0)->shape())); } Status ShapeVerifier::HandleReducePrecision(HloInstruction* reduce_precision) { + TF_RETURN_IF_ERROR(CheckOperandCount(reduce_precision, 1)); return CheckShape(reduce_precision, ShapeInference::InferReducePrecisionShape( reduce_precision->operand(0)->shape(), reduce_precision->exponent_bits(), @@ -156,6 +172,7 @@ Status ShapeVerifier::CheckOperandAndParameter( } Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1)); HloInfeedInstruction* infeed = Cast(instruction); TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 0)); @@ -166,6 +183,7 @@ Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) { } Status ShapeVerifier::HandleOutfeed(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); HloOutfeedInstruction* outfeed = Cast(instruction); TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 1)); @@ -192,10 +210,7 @@ bool ShapeVerifier::HasCompatibleElementTypes(const Shape& shape_0, } Status ShapeVerifier::HandleRng(HloInstruction* instruction) { - if (instruction->operand_count() != 2) { - return InternalError("Expected two operands for Rng instruction: %s", - instruction->ToString()); - } + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); const Shape& shape_0 = instruction->operand(0)->shape(); const Shape& shape_1 = instruction->operand(1)->shape(); @@ -244,12 +259,17 @@ Status ShapeVerifier::HandleRng(HloInstruction* instruction) { } Status ShapeVerifier::HandleReverse(HloInstruction* reverse) { + TF_RETURN_IF_ERROR(CheckOperandCount(reverse, 1)); return CheckShape( reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(), reverse->dimensions())); } Status ShapeVerifier::HandleSort(HloInstruction* sort) { + if (sort->operand_count() < 1 || sort->operand_count() > 2) { + return InternalError("Expected 1 or 2 operands for %s instruction: %s", + HloOpcodeString(sort->opcode()), sort->ToString()); + } if (sort->operand_count() == 2 && !ShapeUtil::SameDimensions(sort->operand(0)->shape(), sort->operand(1)->shape())) { @@ -263,10 +283,12 @@ Status ShapeVerifier::HandleSort(HloInstruction* sort) { } Status ShapeVerifier::HandleConstant(HloInstruction* constant) { + TF_RETURN_IF_ERROR(CheckOperandCount(constant, 0)); return CheckShape(constant, constant->literal().shape()); } Status ShapeVerifier::HandleIota(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 0)); auto* iota = Cast(instruction); const int64 rank = ShapeUtil::Rank(iota->shape()); if (rank == 0) { @@ -281,6 +303,7 @@ Status ShapeVerifier::HandleIota(HloInstruction* instruction) { } Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) { + TF_RETURN_IF_ERROR(CheckOperandCount(get_tuple_element, 1)); return CheckShape(get_tuple_element, ShapeInference::InferGetTupleElementShape( get_tuple_element->operand(0)->shape(), @@ -288,6 +311,12 @@ Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) { } Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { + if (reduce->operand_count() % 2 != 0) { + return InternalError( + "Expected an even number of operands for %s instruction: %s", + HloOpcodeString(reduce->opcode()), reduce->ToString()); + } + std::vector operand_shapes; for (const HloInstruction* operand : reduce->operands()) { operand_shapes.push_back(&operand->shape()); @@ -298,10 +327,12 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { } Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { + TF_RETURN_IF_ERROR(CheckOperandCount(bitcast, 1)); return Status::OK(); } Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { + TF_RETURN_IF_ERROR(CheckOperandCount(broadcast, 1)); // HLO broadcast has no exact analog at the proto level so there is no // ShapeInference method. Check the output shape explicitly. const Shape& operand_shape = broadcast->operand(0)->shape(); @@ -322,6 +353,7 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { } Status ShapeVerifier::HandleReshape(HloInstruction* reshape) { + TF_RETURN_IF_ERROR(CheckOperandCount(reshape, 1)); // Check for mixed precision. TF_RETURN_IF_ERROR(CheckShape(reshape, reshape->shape())); TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) == @@ -330,12 +362,14 @@ Status ShapeVerifier::HandleReshape(HloInstruction* reshape) { } Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) { + TF_RETURN_IF_ERROR(CheckOperandCount(transpose, 1)); return CheckShape( transpose, ShapeInference::InferTransposeShape( transpose->operand(0)->shape(), transpose->dimensions())); } Status ShapeVerifier::HandleParameter(HloInstruction* hlo) { + TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 0)); return Status::OK(); } @@ -383,6 +417,7 @@ Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) { } Status ShapeVerifier::HandleSlice(HloInstruction* slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(slice, 1)); return CheckShape(slice, ShapeInference::InferSliceShape( slice->operand(0)->shape(), slice->slice_starts(), @@ -390,6 +425,7 @@ Status ShapeVerifier::HandleSlice(HloInstruction* slice) { } Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2)); return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape( dynamic_slice->operand(0)->shape(), dynamic_slice->operand(1)->shape(), @@ -398,6 +434,7 @@ Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) { Status ShapeVerifier::HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3)); return CheckShape(dynamic_update_slice, ShapeInference::InferDynamicUpdateSliceShape( dynamic_update_slice->operand(0)->shape(), @@ -427,6 +464,7 @@ Status ShapeVerifier::HandleMap(HloInstruction* map) { } Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) { + TF_RETURN_IF_ERROR(CheckOperandCount(reduce_window, 2)); return CheckShape( reduce_window, ShapeInference::InferReduceWindowShape( @@ -436,6 +474,7 @@ Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) { } Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3)); return CheckShape( instruction, ShapeInference::InferSelectAndScatterShape( @@ -446,6 +485,7 @@ Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) { } Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) { + TF_RETURN_IF_ERROR(CheckOperandCount(xla_while, 1)); TF_RETURN_IF_ERROR( CheckOperandAndParameter(xla_while, 0, xla_while->while_body(), 0)); TF_RETURN_IF_ERROR( @@ -465,6 +505,7 @@ Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) { } Status ShapeVerifier::HandleConditional(HloInstruction* conditional) { + TF_RETURN_IF_ERROR(CheckOperandCount(conditional, 3)); TF_RETURN_IF_ERROR(CheckOperandAndParameter( conditional, 1, conditional->true_computation(), 0)); TF_RETURN_IF_ERROR(CheckOperandAndParameter( @@ -479,12 +520,14 @@ Status ShapeVerifier::HandleConditional(HloInstruction* conditional) { } Status ShapeVerifier::HandlePad(HloInstruction* pad) { + TF_RETURN_IF_ERROR(CheckOperandCount(pad, 2)); return CheckShape(pad, ShapeInference::InferPadShape(pad->operand(0)->shape(), pad->operand(1)->shape(), pad->padding_config())); } Status ShapeVerifier::HandleSend(HloInstruction* send) { + TF_RETURN_IF_ERROR(CheckOperandCount(send, 2)); return CheckShape(send, ShapeUtil::MakeTupleShape({send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {}), @@ -492,10 +535,12 @@ Status ShapeVerifier::HandleSend(HloInstruction* send) { } Status ShapeVerifier::HandleSendDone(HloInstruction* send_done) { + TF_RETURN_IF_ERROR(CheckOperandCount(send_done, 1)); return CheckShape(send_done, ShapeUtil::MakeTokenShape()); } Status ShapeVerifier::HandleRecv(HloInstruction* recv) { + TF_RETURN_IF_ERROR(CheckOperandCount(recv, 1)); return CheckShape( recv, ShapeUtil::MakeTupleShape( {ShapeUtil::GetTupleElementShape(recv->shape(), 0), @@ -503,6 +548,7 @@ Status ShapeVerifier::HandleRecv(HloInstruction* recv) { } Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) { + TF_RETURN_IF_ERROR(CheckOperandCount(recv_done, 1)); return CheckShape( recv_done, ShapeUtil::MakeTupleShape( @@ -512,6 +558,7 @@ Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) { Status ShapeVerifier::HandleBatchNormTraining( HloInstruction* batch_norm_training) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_training, 3)); return CheckShape(batch_norm_training, ShapeInference::InferBatchNormTrainingShape( batch_norm_training->operand(0)->shape(), @@ -522,6 +569,7 @@ Status ShapeVerifier::HandleBatchNormTraining( Status ShapeVerifier::HandleBatchNormInference( HloInstruction* batch_norm_inference) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_inference, 5)); return CheckShape(batch_norm_inference, ShapeInference::InferBatchNormInferenceShape( batch_norm_inference->operand(0)->shape(), @@ -533,6 +581,7 @@ Status ShapeVerifier::HandleBatchNormInference( } Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_grad, 5)); return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape( batch_norm_grad->operand(0)->shape(), batch_norm_grad->operand(1)->shape(), @@ -601,6 +650,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) { } // namespace Status ShapeVerifier::HandleGather(HloInstruction* gather) { + TF_RETURN_IF_ERROR(CheckOperandCount(gather, 2)); return CheckShape( gather, ShapeInference::InferGatherShape( @@ -609,6 +659,7 @@ Status ShapeVerifier::HandleGather(HloInstruction* gather) { } Status ShapeVerifier::HandleScatter(HloInstruction* scatter) { + TF_RETURN_IF_ERROR(CheckOperandCount(scatter, 3)); return CheckShape( scatter, ShapeInference::InferScatterShape( scatter->operand(0)->shape(), scatter->operand(1)->shape(), @@ -696,12 +747,14 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction, } Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1)); return CheckShape(instruction, ShapeInference::InferUnaryOpShape(instruction->opcode(), instruction->operand(0))); } Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); return CheckShape( instruction, ShapeInference::InferBinaryOpShape(instruction->opcode(), instruction->operand(0), @@ -709,6 +762,7 @@ Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) { } Status ShapeVerifier::CheckTernaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3)); return CheckShape(instruction, ShapeInference::InferTernaryOpShape( instruction->opcode(), instruction->operand(0), @@ -816,6 +870,47 @@ Status VerifyEntryAndExitShapes(const HloModule& module) { return Status::OK(); } +// Verifies that entry computation layout matches characteristics of +// entry computation. +Status CheckEntryComputationLayout(const HloModule& module) { + const HloComputation* computation = module.entry_computation(); + const auto& layout = module.entry_computation_layout(); + + // TODO(117498192): Change into a call to Compatible(...). + if (!ShapeUtil::CompatibleIgnoringFpPrecision( + computation->root_instruction()->shape(), + layout.result_layout().shape())) { + return InternalError( + "Shape of the root instruction of entry computation (%s) should be " + "compatible to one specified in module's entry computation layout (%s)", + ShapeUtil::HumanString(computation->root_instruction()->shape()), + ShapeUtil::HumanString(layout.result_layout().shape())); + } + + if (computation->num_parameters() != layout.parameter_count()) { + return InternalError( + "Number of parameters in entry computation layout (%d) must be same " + "as number of parameters of entry computation computation (%d)", + layout.parameter_count(), computation->num_parameters()); + } + + for (int i = 0; i < computation->num_parameters(); ++i) { + if (!ShapeUtil::Compatible(computation->parameter_instruction(i)->shape(), + layout.parameter_shape(i))) { + return InternalError( + "Shape of the entry computation parameter %d is %s should be " + "compatible to the one specified in module's entry computation " + "layout %s", + i, + ShapeUtil::HumanString( + computation->parameter_instruction(i)->shape()), + ShapeUtil::HumanString(layout.parameter_shape(i))); + } + } + + return Status::OK(); +} + // Checks if the given two instructions share the same channel id. Status CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2) { @@ -1213,6 +1308,7 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier)); } + TF_RETURN_IF_ERROR(CheckEntryComputationLayout(*module)); TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module)); // If the module has a schedule, it must be valid. -- GitLab From 65b7d0b2f84c334327a295bf41bc06c7f6b8ffe5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 9 Oct 2018 16:52:56 -0700 Subject: [PATCH 164/411] [XLA:GPU] Elide the SequentialThunk when emitting scatter with no copy We have a 1-element thunk sequence if we're not copying. That's still two thunks and hlo profiling gets confused if it sees two thunks for the same instruction and one of them claims to be the whole instruction. PiperOrigin-RevId: 216448063 --- .../xla/service/gpu/ir_emitter_unnested.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index bef7a55301..09486d291a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -2080,9 +2080,9 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { // Launch a kernel that reads every element in the updates tensor. We could // also do one kernel per window instead if bounds checks turn out to be a // bottleneck. - thunks.push_back(BuildKernelThunk( - scatter, - /*implements_whole_instruction=*/operand_buffer == destination_buffer)); + thunks.push_back( + BuildKernelThunk(scatter, + /*implements_whole_instruction=*/thunks.empty())); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( updates->shape(), ir_emitter_context_->device_description()); @@ -2090,8 +2090,12 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { static_cast(thunks.back().get()), ir_emitter_context_->llvm_module()); - thunk_sequence_->emplace_back( - absl::make_unique(std::move(thunks), scatter)); + if (thunks.size() == 1) { + thunk_sequence_->push_back(std::move(thunks[0])); + } else { + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), scatter)); + } return ParallelLoopEmitter(loop_body_emitter, updates->shape(), launch_dimensions, &b_) .EmitLoop(IrName(scatter), -- GitLab From bb5fc614a4a358b350ef8dd19cb7010760fa9b29 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 16:53:29 -0700 Subject: [PATCH 165/411] [XLA] Cleanup: Make AllocationTracker::Resolve const. So that when resolving some global data, we don't have to worry whether "Resolve" is going to mutate the real data. PiperOrigin-RevId: 216448145 --- tensorflow/compiler/xla/service/allocation_tracker.cc | 6 +++--- tensorflow/compiler/xla/service/allocation_tracker.h | 8 ++++---- tensorflow/compiler/xla/service/service.cc | 4 ++-- tensorflow/compiler/xla/service/service.h | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 1ed6142dce..ef5e211646 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -176,13 +176,13 @@ StatusOr> AllocationTracker::DeconstructTuple( } StatusOr> AllocationTracker::Resolve( - const GlobalDataHandle& data) { + const GlobalDataHandle& data) const { tensorflow::mutex_lock lock(mutex_); return AllocationTracker::ResolveInternal(data); } StatusOr AllocationTracker::ResolveForReplica( - const GlobalDataHandle& data, int replica_id) { + const GlobalDataHandle& data, int replica_id) const { tensorflow::mutex_lock lock(mutex_); TF_ASSIGN_OR_RETURN(std::vector replicated_buffers, ResolveInternal(data)); @@ -196,7 +196,7 @@ StatusOr AllocationTracker::ResolveForReplica( } StatusOr> AllocationTracker::ResolveInternal( - const GlobalDataHandle& data) { + const GlobalDataHandle& data) const { VLOG(2) << "resolve:" << data.handle(); auto it = handle_to_shaped_buffers_.find(data.handle()); if (it == handle_to_shaped_buffers_.end()) { diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index 43feccee3c..98d1a302a9 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -65,13 +65,13 @@ class AllocationTracker { // replica, or provide an error status to say whether any of those buffers // were not found (or found, but found deallocated). StatusOr> Resolve( - const GlobalDataHandle& data); + const GlobalDataHandle& data) const; // Resolves a handle from an XLA client and replica id to a shaped buffer, or // provide an error status to say whether it was not found (or found, but // found deallocated). StatusOr ResolveForReplica(const GlobalDataHandle& data, - int replica_id); + int replica_id) const; private: // Data structure encapsulating single memory allocation on the device. @@ -87,7 +87,7 @@ class AllocationTracker { // Internal helper which resolves the given GlobalDataHandle to a // list of ScopedShapedBuffers. StatusOr> ResolveInternal( - const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + const GlobalDataHandle& data) const EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Internal helper which registers a vector of shaped buffers, one per // replica. ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer. If @@ -113,7 +113,7 @@ class AllocationTracker { // maintained per device ordinal. using AllocationMap = absl::flat_hash_map; - tensorflow::mutex mutex_; + mutable tensorflow::mutex mutex_; // Backend to use with this tracker. The backend supplies the memory allocator // to use when deallocating memory. diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index b27a92f2a0..084df17951 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -207,7 +207,7 @@ Status Service::ValidateResultShape(const Shape& client_shape, StatusOr>> Service::ResolveAndValidateArguments( absl::Span arguments, - absl::Span stream_executors) { + absl::Span stream_executors) const { CHECK_EQ(options_.number_of_replicas(), stream_executors.size()); std::vector> replicated_arguments; replicated_arguments.resize(options_.number_of_replicas()); @@ -590,7 +590,7 @@ StatusOr> Service::GetExecutors( StatusOr>> Service::GetArguments( const ExecutionOptions& execution_options, - absl::Span arguments) { + absl::Span arguments) const { // Resolve the allocations for the arguments of the computation, and create // a vector of device memory offsets for the arguments from the allocations. // In the case of partitioned computations, assume all arguments go on the diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 1f62fad4c8..8cf1a7b9f0 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -191,7 +191,7 @@ class Service : public ServiceInterface { // Prepare the arguments for executing parallel. StatusOr>> GetArguments( const ExecutionOptions& execution_options, - absl::Span arguments); + absl::Span arguments) const; protected: friend class LocalExecutable; @@ -208,7 +208,7 @@ class Service : public ServiceInterface { StatusOr>> ResolveAndValidateArguments( absl::Span arguments, - absl::Span stream_executors); + absl::Span stream_executors) const; // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. -- GitLab From 9bd459e4ceba14f9bb1af98d52a109325de952e8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 17:14:39 -0700 Subject: [PATCH 166/411] Adds an Objective-C API to TensorFlow Lite experimental. PiperOrigin-RevId: 216451263 --- .../contrib/lite/experimental/objc/BUILD | 94 ++++ .../contrib/lite/experimental/objc/README.md | 10 + .../Configs/TensorFlowLiteObjc.tulsigen | 60 +++ .../project.tulsiconf | 17 + .../experimental/objc/apis/TFLInterpreter.h | 188 ++++++++ .../objc/apis/TFLInterpreterOptions.h | 37 ++ .../objc/apis/TFLQuantizationParameters.h | 36 ++ .../lite/experimental/objc/apis/TFLTensor.h | 77 +++ .../experimental/objc/sources/TFLErrorUtil.h | 51 ++ .../experimental/objc/sources/TFLErrorUtil.m | 45 ++ .../objc/sources/TFLInterpreter.mm | 440 ++++++++++++++++++ .../objc/sources/TFLInterpreterOptions.m | 30 ++ .../objc/sources/TFLQuantizationParameters.m | 23 + .../objc/sources/TFLTensor+Internal.h | 42 ++ .../experimental/objc/sources/TFLTensor.m | 54 +++ .../objc/tests/TFLInterpreterOptionsTests.m | 49 ++ .../objc/tests/TFLInterpreterTests.m | 266 +++++++++++ .../tools/pip_package/pip_smoke_test.py | 1 + 18 files changed, 1520 insertions(+) create mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD create mode 100644 tensorflow/contrib/lite/experimental/objc/README.md create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD new file mode 100644 index 0000000000..236b96adb5 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/BUILD @@ -0,0 +1,94 @@ +# TensorFlow Lite Objective-C API. + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +load("//tools/build_defs/apple:ios.bzl", "ios_unit_test") + +SOURCES = glob([ + "sources/*.h", + "sources/*.m", + "sources/*.mm", +]) + +API_HEADERS = glob([ + "apis/*.h", +]) + +MINIMUM_OS_VERSION = "8.0" + +# Compiler flags for building regular non-test libraries. +RELEASE_COPTS = [ + # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++. + "-Wall", + # Warns if functions, variables, and types marked with the deprecated attribute are being used. + "-Wdeprecated-declarations", + # Warns for errors in documentation. + "-Wdocumentation", + # Turns all warnings into errors. + "-Werror", + # Enables extra warning flags that are not enabled by -Wall. + "-Wextra", + # Warns if a global function is defined without a previous prototype declaration. + "-Wmissing-prototypes", + # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison. + "-Wno-sign-compare", + # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks. + "-Wno-unused-parameter", + # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable. + "-Wshadow", + # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of (). + "-Wstrict-prototypes", + # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet. + "-Wundeclared-selector", + + # Turn off warnings for headers not part of TensorFlow Lite Objective-C API. + "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/", +] + +# Compiler flags for building test libraries. +TEST_COPTS = RELEASE_COPTS + [ + # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument. + "-Wno-nonnull", + # Disables warning when a global or local variable or type declaration shadows another. + "-Wno-shadow", +] + +objc_library( + name = "TensorFlowLiteObjCLib", + srcs = SOURCES, + hdrs = API_HEADERS, + copts = RELEASE_COPTS, + deps = [ + "//tensorflow/contrib/lite/experimental/c:c_api", + ], + alwayslink = 1, +) + +ios_unit_test( + name = "TensorFlowLiteObjCTests", + size = "small", + minimum_os_version = MINIMUM_OS_VERSION, + deps = [":TensorFlowLiteObjCTestLib"], +) + +objc_library( + name = "TensorFlowLiteObjCTestLib", + testonly = 1, + srcs = glob([ + "tests/*.m", + ]), + hdrs = glob([ + "apis/*.h", + "sources/*.h", + "tests/*.h", + ]), + copts = TEST_COPTS, + resources = [ + "//tensorflow/contrib/lite:testdata/add.bin", + ], + deps = [ + ":TensorFlowLiteObjCLib", + ], +) diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md new file mode 100644 index 0000000000..e8f150b1e8 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/README.md @@ -0,0 +1,10 @@ +# TensorFlow Lite Objective-C API + +## TensorFlowLiteObjc Tulsi Project + +Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by +running the following command in Terminal from the root source directory: + +```shell +generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj +``` diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen new file mode 100644 index 0000000000..babb5902d3 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen @@ -0,0 +1,60 @@ +{ + "sourceFilters" : [ + "third_party/tensorflow/contrib/lite", + "third_party/tensorflow/contrib/lite/experimental/c", + "third_party/tensorflow/contrib/lite/experimental/objc", + "third_party/tensorflow/contrib/lite/experimental/objc/apis", + "third_party/tensorflow/contrib/lite/experimental/objc/sources", + "third_party/tensorflow/contrib/lite/experimental/objc/tests", + "third_party/tensorflow/contrib/lite/kernels", + "third_party/tensorflow/contrib/lite/kernels/internal", + "third_party/tensorflow/contrib/lite/nnapi", + "third_party/tensorflow/contrib/lite/schema", + ], + "buildTargets" : [ + "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib", + "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests", + ], + "projectName" : "TensorFlowLiteObjC", + "optionSet" : { + "LaunchActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsDebug" : { + "p" : "$(inherited)" + }, + "EnvironmentVariables" : { + "p" : "$(inherited)" + }, + "BuildActionPreActionScript" : { + "p" : "$(inherited)" + }, + "CommandlineArguments" : { + "p" : "$(inherited)" + }, + "TestActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsDebug" : { + "p" : "$(inherited)" + }, + "BuildActionPostActionScript" : { + "p" : "$(inherited)" + }, + "TestActionPostActionScript" : { + "p" : "$(inherited)" + }, + "LaunchActionPostActionScript" : { + "p" : "$(inherited)" + } + }, + "additionalFilePaths" : [ + "third_party/tensorflow/contrib/lite/experimental/objc/BUILD", + ] +} diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf new file mode 100644 index 0000000000..00299cd4cf --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf @@ -0,0 +1,17 @@ +{ + "configDefaults" : { + "optionSet" : { + "BazelBuildOptionsDebug" : { + "p" : "--ios_minimum_os=8.0" + }, + "BazelBuildOptionsRelease" : { + "p" : "--ios_minimum_os=8.0" + }, + } + }, + "projectName" : "TensorFlowLiteObjC", + "packages" : [ + "third_party/tensorflow/contrib/lite/experimental/objc" + ], + "workspaceRoot" : "../../../../../../.." +} diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h new file mode 100644 index 0000000000..c07ffc06ff --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h @@ -0,0 +1,188 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@class TFLInterpreterOptions; +@class TFLTensor; + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLInterpreterErrorCode + * This enum specifies various error codes related to `TFLInterpreter`. + */ +typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) { + /** Provided tensor index is invalid. */ + TFLInterpreterErrorCodeInvalidTensorIndex, + + /** Input data has invalid byte size. */ + TFLInterpreterErrorCodeInvalidInputByteSize, + + /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */ + TFLInterpreterErrorCodeInvalidShape, + + /** Provided model cannot be loaded. */ + TFLInterpreterErrorCodeFailedToLoadModel, + + /** Failed to create `TFLInterpreter`. */ + TFLInterpreterErrorCodeFailedToCreateInterpreter, + + /** Failed to invoke `TFLInterpreter`. */ + TFLInterpreterErrorCodeFailedToInvoke, + + /** Failed to retrieve a tensor. */ + TFLInterpreterErrorCodeFailedToGetTensor, + + /** Failed to resize an input tensor. */ + TFLInterpreterErrorCodeFailedToResizeInputTensor, + + /** Failed to copy data into an input tensor. */ + TFLInterpreterErrorCodeFailedToCopyDataToInputTensor, + + /** Failed to get data from an output tensor. */ + TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor, + + /** Failed to allocate memory for tensors. */ + TFLInterpreterErrorCodeFailedToAllocateTensors, + + /** Operaton not allowed without allocating memory for tensors first. */ + TFLInterpreterErrorCodeAllocateTensorsRequired, + + /** Operaton not allowed without invoking the interpreter first. */ + TFLInterpreterErrorCodeInvokeInterpreterRequired, +}; + +/** + * A TensorFlow Lite model interpreter. + */ +@interface TFLInterpreter : NSObject + +/** The total number of input tensors. 0 if the interpreter creation failed. */ +@property(nonatomic, readonly) NSUInteger inputTensorCount; + +/** The total number of output tensors. 0 if the interpreter creation failed. */ +@property(nonatomic, readonly) NSUInteger outputTensorCount; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +/** + * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the + * default interpreter options. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * + * @return A new instance of `TFLInterpreter` with the given model and the default interpreter + * options. + */ +- (instancetype)initWithModelPath:(NSString *)modelPath; + +/** + * Initializes a new TensorFlow Lite interpreter instance with the given model file path and + * options. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * @param options Options to use for configuring the TensorFlow Lite interpreter. + * + * @return A new instance of `TFLInterpreter` with the given model and options. + */ +- (instancetype)initWithModelPath:(NSString *)modelPath + options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER; + +/** + * Invokes the interpreter to run inference. + * + * @param error An optional error parameter populated when there is an error in invoking the + * interpreter. + * + * @return Whether the invocation is successful. Returns NO if an error occurred. + */ +- (BOOL)invokeWithError:(NSError **)error; + +/** + * Returns the input tensor at the given index. + * + * @param index The index of an input tensor. + * @param error An optional error parameter populated when there is an error in looking up the input + * tensor. + * + * @return The input tensor at the given index. `nil` if there is an error. + */ +- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Returns the output tensor at the given index. + * + * @param index The index of an output tensor. + * @param error An optional error parameter populated when there is an error in looking up the + * output tensor. + * + * @return The output tensor at the given index. `nil` if there is an error. + */ +- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned + * integers). + * + * @param index The index of an input tensor. + * @param shape Shape that the given input tensor should be resized to. It should be an array of + * positive unsigned integer(s) containing the size of each dimension. + * @param error An optional error parameter populated when there is an error in resizing the input + * tensor. + * + * @return Whether the input tensor was resized successfully. Returns NO if an error occurred. + */ +- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index + toShape:(NSArray *)shape + error:(NSError **)error; + +/** + * Copies the given data into the input tensor at the given index. This is allowed only before the + * interpreter is invoked. + * + * @param data The data to set. The byte size of the data must match what's required by the given + * input tensor. + * @param index The index of an input tensor. + * @param error An optional error parameter populated when there is an error in setting the data. + * + * @return Whether the data was set into the input tensor successfully. Returns NO if an error + * occurred. + */ +- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Gets the data from the output tensor at the given index. The interpreter invocation has to + * complete before the data can be retrieved from an output tensor. + * + * @param index The index of an output tensor. + * @param error An optional error parameter populated when there is an error in getting the data. + * + * @return The data of the output tensor at the given index. `nil` if there is an error. + */ +- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Allocates memory for tensors. + * + * @param error An optional error parameter populated when there is an error in allocating memory. + * + * @return Whether memory allocation is successful. Returns NO if an error occurred. + */ +- (BOOL)allocateTensorsWithError:(NSError **)error; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h new file mode 100644 index 0000000000..6461fbf017 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h @@ -0,0 +1,37 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** Custom configuration options for a TensorFlow Lite interpreter. */ +@interface TFLInterpreterOptions : NSObject + +/** + * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting + * TensorFlow Lite to optimize the threading decision). + */ +@property(nonatomic) NSUInteger numberOfThreads; + +/** + * Initializes a new instance of `TFLInterpreterOptions`. + * + * @return A new instance of `TFLInterpreterOptions`. + */ +- (instancetype)init NS_DESIGNATED_INITIALIZER; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h new file mode 100644 index 0000000000..3d5cf793c5 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h @@ -0,0 +1,36 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + * Parameters for asymmetric quantization. Quantized values can be converted to float values using: + * `realValue = scale * (quantizedValue - zeroPoint)`. + */ +@interface TFLQuantizationParameters : NSObject + +/** Scale of asymmetric quantization. */ +@property(nonatomic, readonly) float scale; + +/** Zero point of asymmetric quantization. */ +@property(nonatomic, readonly) int32_t zeroPoint; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h new file mode 100644 index 0000000000..d08b8fc0e9 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h @@ -0,0 +1,77 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@class TFLQuantizationParameters; + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLTensorDataType + * This enum specifies supported TensorFlow Lite tensor data types. + */ +typedef NS_ENUM(NSUInteger, TFLTensorDataType) { + /** Tensor data type not available. This indicates an error with the model. */ + TFLTensorDataTypeNoType, + + /** 32-bit single precision floating point. */ + TFLTensorDataTypeFloat32, + + /** 32-bit signed integer. */ + TFLTensorDataTypeInt32, + + /** 8-bit unsigned integer. */ + TFLTensorDataTypeUInt8, + + /** 64-bit signed integer. */ + TFLTensorDataTypeInt64, + + /** Boolean. */ + TFLTensorDataTypeBool, + + /** 16-bit signed integer. */ + TFLTensorDataTypeInt16, +}; + +/** + * An input or output tensor in a TensorFlow Lite model. + */ +@interface TFLTensor : NSObject + +/** Name of the tensor. */ +@property(nonatomic, readonly, copy) NSString *name; + +/** Data type of the tensor. */ +@property(nonatomic, readonly) TFLTensorDataType dataType; + +/** + * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each + * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is + * [2, 2, 3]. + */ +@property(nonatomic, readonly, copy) NSArray *shape; + +/** Number of bytes for the tensor data. */ +@property(nonatomic, readonly) NSUInteger byteSize; + +/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */ +@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h new file mode 100644 index 0000000000..b6fd4763d6 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h @@ -0,0 +1,51 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Helper utility for error reporting. */ +@interface TFLErrorUtil : NSObject + +/** + * Creates and returns an interpreter error with the given error code and description. + * + * @param code Error code. + * @param description Error description. + * + * @return The created interpreter error with the given error code and description. + */ ++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description; + +/** + * Creates and saves an interpreter error with the given error code and description. + * + * @param code Error code. + * @param description Error description. + * @param error Pointer to where to save the created error. If `nil`, no error will be saved. + */ ++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description + error:(NSError **)error; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m new file mode 100644 index 0000000000..756d69481c --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m @@ -0,0 +1,45 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "TFLErrorUtil.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Error domain of TensorFlow Lite interpreter related errors. */ +static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter"; + +@implementation TFLErrorUtil + +#pragma mark - Public + ++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description { + return [NSError errorWithDomain:TFLInterpreterErrorDomain + code:code + userInfo:@{NSLocalizedDescriptionKey : description}]; +} + ++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description + error:(NSError **)error { + if (error) { + *error = [NSError errorWithDomain:TFLInterpreterErrorDomain + code:code + userInfo:@{NSLocalizedDescriptionKey : description}]; + } +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm new file mode 100644 index 0000000000..0f940a5cf3 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm @@ -0,0 +1,440 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" + +#import "TFLErrorUtil.h" +#import "TFLTensor+Internal.h" +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" + +#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLTensorType + * This enum specifies input or output tensor types. + */ +typedef NS_ENUM(NSUInteger, TFLTensorType) { + /** Input tensor type. */ + TFLTensorTypeInput, + + /** Output tensor type. */ + TFLTensorTypeOutput, +}; + +// Names used for indicating input or output in error messages. +static NSString *const kTFLInputDirection = @"input"; +static NSString *const kTFLOutputDirection = @"output"; + +/** + * Error reporter for TFLInterpreter. + * + * @param user_data User data. Not used. + * @param format Error message which may contain argument formatting specifiers. + * @param args Values of the arguments in the error message. + */ +static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) { + NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]); +} + +@interface TFLInterpreter () + +/** TFL_Interpreter backed by C API. */ +@property(nonatomic, nullable) TFL_Interpreter *interpreter; + +/** + * An error in initializing the interpreter. If not `nil`, this error will be reported when the + * interpreter is used. + */ +@property(nonatomic, nullable) NSError *initializationError; + +@end + +@implementation TFLInterpreter + +#pragma mark - NSObject + +- (void)dealloc { + TFL_DeleteInterpreter(_interpreter); +} + +#pragma mark - Public + +- (instancetype)initWithModelPath:(NSString *)modelPath { + return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]]; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options { + self = [super init]; + + if (self != nil) { + const char *modelPathCString = modelPath.UTF8String; + NSString *pathErrorString = + [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath]; + if (modelPathCString == nullptr) { + _initializationError = + [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel + description:pathErrorString]; + return self; + } + + TFL_Model *model = TFL_NewModelFromFile(modelPathCString); + if (model == nullptr) { + _initializationError = + [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel + description:pathErrorString]; + return self; + } + + TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions(); + if (cOptions == nullptr) { + _initializationError = + [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter."]; + TFL_DeleteModel(model); + return self; + } + + if (options.numberOfThreads > 0) { + TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads); + } + TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr); + + _interpreter = TFL_NewInterpreter(model, cOptions); + if (_interpreter == nullptr) { + _initializationError = + [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter."]; + } else { + _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter); + _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter); + if (_inputTensorCount <= 0 || _outputTensorCount <= 0) { + _initializationError = + [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter."]; + } + } + TFL_DeleteInterpreterOptions(cOptions); + TFL_DeleteModel(model); + } + + return self; +} + +- (BOOL)invokeWithError:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return NO; + } + + if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke + description:@"Failed to invoke the interpreter." + error:error]; + return NO; + } + + return YES; +} + +- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return nil; + } + + if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { + return nil; + } + + return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error]; +} + +- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return nil; + } + + if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) { + return nil; + } + + return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error]; +} + +- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index + toShape:(NSArray *)shape + error:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return NO; + } + + if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { + return NO; + } + + if (shape.count == 0) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape + description:@"Invalid shape. Must not be empty." + error:error]; + return NO; + } + + int cDimensions[self.inputTensorCount]; + for (int d = 0; d < shape.count; ++d) { + int dimension = shape[d].intValue; + if (dimension <= 0) { + NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers."; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape + description:errorDescription + error:error]; + return NO; + } + cDimensions[d] = dimension; + } + + if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions, + (int32_t)shape.count) != kTfLiteOk) { + NSString *errorDescription = [NSString + stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return NO; + } + + if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { + return NO; + } + + TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index); + if (tensor == nullptr) { + NSString *errorDescription = [NSString + stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor + description:errorDescription + error:error]; + return NO; + } + + NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor); + if (data.length != byteSize) { + NSString *errorDescription = [NSString + stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).", + (unsigned long)index, byteSize, (unsigned long)data.length]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize + description:errorDescription + error:error]; + return NO; + } + + if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) { + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).", + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return nil; + } + + if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) { + return nil; + } + + const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index); + if (tensor == nullptr) { + NSString *errorDescription = [NSString + stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index]; + [TFLErrorUtil + saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor + description:errorDescription + error:error]; + return nil; + } + + void *bytes = TFL_TensorData(tensor); + NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor); + if (bytes == nullptr || byteSize == 0) { + NSString *errorDescription = [NSString + stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index]; + [TFLErrorUtil + saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor + description:errorDescription + error:error]; + return nil; + } + + return [NSData dataWithBytes:bytes length:byteSize]; +} + +- (BOOL)allocateTensorsWithError:(NSError **)error { + if (self.initializationError != nil) { + [self saveInitializationErrorToDestination:error]; + return NO; + } + + if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors + description:@"Failed to allocate memory for tensors." + error:error]; + return NO; + } + return YES; +} + +#pragma mark - Private + +- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type + atIndex:(NSUInteger)index + error:(NSError **)error { + const TFL_Tensor *tensor = nullptr; + NSString *tensorType; + switch (type) { + case TFLTensorTypeInput: + tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index); + tensorType = kTFLInputDirection; + break; + case TFLTensorTypeOutput: + tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index); + tensorType = kTFLOutputDirection; + break; + } + + if (tensor == nullptr) { + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType, + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor + description:errorDescription + error:error]; + return nil; + } + + const char *cName = TFL_TensorName(tensor); + if (cName == nullptr) { + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType, + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor + description:errorDescription + error:error]; + return nil; + } + NSString *name = [NSString stringWithUTF8String:cName]; + + TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)]; + + int32_t rank = TFL_TensorNumDims(tensor); + if (rank <= 0) { + NSString *errorDescription = + [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType, + (unsigned long)index, rank]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor + description:errorDescription + error:error]; + return nil; + } + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank]; + for (int32_t d = 0; d < rank; d++) { + int32_t dimension = TFL_TensorDim(tensor, d); + if (dimension <= 0) { + NSString *errorDescription = + [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).", + tensorType, (unsigned long)index, d, dimension]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor + description:errorDescription + error:error]; + return nil; + } + shape[d] = @((NSUInteger)dimension); + } + + // TODO: Set quantization parameters when C API supports it. + return [[TFLTensor alloc] initWithName:name + dataType:dataType + shape:shape + byteSize:(NSUInteger)TFL_TensorByteSize(tensor) + quantizationParameters:nil]; +} + +- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType { + switch (cTensorType) { + case kTfLiteFloat32: + return TFLTensorDataTypeFloat32; + case kTfLiteInt32: + return TFLTensorDataTypeInt32; + case kTfLiteUInt8: + return TFLTensorDataTypeUInt8; + case kTfLiteInt64: + return TFLTensorDataTypeInt64; + case kTfLiteBool: + return TFLTensorDataTypeBool; + case kTfLiteInt16: + return TFLTensorDataTypeInt16; + case kTfLiteNoType: + case kTfLiteString: + case kTfLiteComplex64: + // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API. + return TFLTensorDataTypeNoType; + } +} + +- (void)saveInitializationErrorToDestination:(NSError **)destination { + if (destination != NULL) { + *destination = self.initializationError; + } +} + +- (BOOL)isValidTensorIndex:(NSUInteger)index + belowLimit:(NSUInteger)totalTensorCount + error:(NSError **)error { + if (index >= totalTensorCount) { + NSString *errorDescription = + [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).", + (unsigned long)index, (unsigned long)(totalTensorCount - 1)]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m new file mode 100644 index 0000000000..1776688288 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m @@ -0,0 +1,30 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@implementation TFLInterpreterOptions + +#pragma mark - Public + +- (instancetype)init { + self = [super init]; + return self; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m new file mode 100644 index 0000000000..190f0479ce --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m @@ -0,0 +1,23 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h" + +NS_ASSUME_NONNULL_BEGIN + +@implementation TFLQuantizationParameters + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h new file mode 100644 index 0000000000..f2f13e5e5f --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h @@ -0,0 +1,42 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface TFLTensor (Internal) + +/** + * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters. + * + * @param name Name of the tensor. + * @param dataType Data type of the tensor. + * @param shape Shape of the tensor. + * @param byteSize Size of the tensor data in number of bytes. + * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not + * use quantization. + * + * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization + * parameters. + */ +- (instancetype)initWithName:(NSString *)name + dataType:(TFLTensorDataType)dataType + shape:(NSArray *)shape + byteSize:(NSUInteger)byteSize + quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m new file mode 100644 index 0000000000..adb1c5ad2c --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m @@ -0,0 +1,54 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" + +#import "TFLTensor+Internal.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface TFLTensor () + +// Redefines readonly properties. +@property(nonatomic, copy) NSString *name; +@property(nonatomic) TFLTensorDataType dataType; +@property(nonatomic, copy) NSArray *shape; +@property(nonatomic) NSUInteger byteSize; +@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters; + +@end + +@implementation TFLTensor + +#pragma mark - TFLTensor (Internal) + +- (instancetype)initWithName:(NSString *)name + dataType:(TFLTensorDataType)dataType + shape:(NSArray *)shape + byteSize:(NSUInteger)byteSize + quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters { + self = [super init]; + if (self != nil) { + _name = [name copy]; + _dataType = dataType; + _shape = [shape copy]; + _byteSize = byteSize; + _quantizationParameters = quantizationParameters; + } + return self; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m new file mode 100644 index 0000000000..17c495fa18 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m @@ -0,0 +1,49 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + * Unit tests for TFLInterpreterOptions. + */ +@interface TFLInterpreterOptionsTests : XCTestCase +@end + +@implementation TFLInterpreterOptionsTests + +#pragma mark - Tests + +- (void)testInit { + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + XCTAssertNotNil(options); + XCTAssertEqual(options.numberOfThreads, 0); +} + +- (void)testSetNumberOfThread { + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + options.numberOfThreads = 2; + XCTAssertEqual(options.numberOfThreads, 2); + options.numberOfThreads = 0; + XCTAssertEqual(options.numberOfThreads, 0); + options.numberOfThreads = 3; + XCTAssertEqual(options.numberOfThreads, 3); +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m new file mode 100644 index 0000000000..9e6319a732 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m @@ -0,0 +1,266 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" + +#import + +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" +#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Model resource name. */ +static NSString *const kAddModelResourceName = @"add"; + +/** Model resource type. */ +static NSString *const kAddModelResourceType = @"bin"; + +/** Rank of the input and output tensor in the Add model. */ +static const NSUInteger kAddModelTensorRank = 1U; + +/** Size of the first (and only) dimension of the input and output tensor in the Add model. */ +static const NSUInteger kAddModelTensorFirstDimensionSize = 2U; + +/** Invalid input tensor index. */ +static const NSUInteger kInvalidInputTensorIndex = 1U; + +/** Invalid output tensor index. */ +static const NSUInteger kInvalidOutputTensorIndex = 1U; + +/** Accurary used in comparing floating numbers. */ +static const float kTestAccuracy = 1E-5F; + +/** + * Unit tests for TFLInterpreter. + */ +@interface TFLInterpreterTests : XCTestCase + +/** Absolute path of the Add model resource. */ +@property(nonatomic, nullable) NSString *modelPath; + +/** Default interpreter using the Add model. */ +@property(nonatomic, nullable) TFLInterpreter *interpreter; + +@end + +@implementation TFLInterpreterTests + +#pragma mark - XCTestCase + +- (void)setUp { + [super setUp]; + + NSBundle *bundle = [NSBundle bundleForClass:[self class]]; + self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType]; + self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath]; + XCTAssertNotNil(self.interpreter); + XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]); +} + +- (void)tearDown { + self.modelPath = nil; + self.interpreter = nil; + + [super tearDown]; +} + +#pragma mark - Tests + +- (void)testSuccessfulFullRun { + // Shape for both input and output tensor. + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + + // Creates the interpreter options. + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + XCTAssertNotNil(options); + options.numberOfThreads = 2; + + // Creates the interpreter. + TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath + options:options]; + XCTAssertNotNil(customInterpreter); + + // Allocates memory for tensors. + NSError *error; + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies input and output tensor counts. + XCTAssertEqual(customInterpreter.inputTensorCount, 1); + XCTAssertEqual(customInterpreter.outputTensorCount, 1); + + // Resizes the intput tensor. + XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertNil(error); + + // Re-allocates memory for tensors. + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies the input tensor. + TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(inputTensor); + XCTAssertNil(error); + XCTAssertTrue([inputTensor.name isEqualToString:@"input"]); + XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32); + XCTAssertTrue([shape isEqualToArray:inputTensor.shape]); + XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize); + + // Copies the input data. + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [inputData appendBytes:&one length:sizeof(float)]; + [inputData appendBytes:&three length:sizeof(float)]; + XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); + XCTAssertNil(error); + + // Invokes the interpreter. + XCTAssertTrue([customInterpreter invokeWithError:&error]); + XCTAssertNil(error); + + // Verifies the output tensor. + TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(outputTensor); + XCTAssertNil(error); + XCTAssertTrue([outputTensor.name isEqualToString:@"output"]); + XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32); + XCTAssertTrue([shape isEqualToArray:outputTensor.shape]); + XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize); + + // Tries to query an invalid output tensor index. + TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex + error:&error]; + XCTAssertNil(invalidOutputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); + + // Gets the output tensor data. + error = nil; + NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(outputData); + XCTAssertNil(error); + float output[kAddModelTensorFirstDimensionSize]; + [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)]; + XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy); + XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy); +} + +- (void)testInitWithModelPath_invalidPath { + // Shape for both input and output tensor. + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + + // Creates the interpreter. + TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"]; + XCTAssertNotNil(brokenInterpreter); + XCTAssertEqual(brokenInterpreter.inputTensorCount, 0); + XCTAssertEqual(brokenInterpreter.outputTensorCount, 0); + + // Allocates memory for tensors. + NSError *error; + XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Resizes the intput tensor. + XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Verifies the input tensor. + TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error]; + XCTAssertNil(inputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Copies the input data. + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [inputData appendBytes:&one length:sizeof(float)]; + [inputData appendBytes:&three length:sizeof(float)]; + XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Invokes the interpreter. + XCTAssertFalse([brokenInterpreter invokeWithError:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Verifies the output tensor. + TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error]; + XCTAssertNil(outputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); + + // Gets the output tensor data. + NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error]; + XCTAssertNil(outputData); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); +} + +- (void)testInvoke_beforeAllocation { + TFLInterpreter *interpreterWithoutAllocation = + [[TFLInterpreter alloc] initWithModelPath:self.modelPath]; + XCTAssertNotNil(interpreterWithoutAllocation); + + NSError *error; + XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke); +} + +- (void)testInputTensorAtIndex_invalidIndex { + NSError *error; + TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex + error:&error]; + XCTAssertNil(inputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); +} + +- (void)testResizeInputTensorAtIndex_invalidIndex { + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex + toShape:shape + error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); +} + +- (void)testResizeInputTensorAtIndex_emptyShape { + NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); +} + +- (void)testResizeInputTensorAtIndex_zeroDimensionSize { + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:0]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); +} + +- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize { + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [inputData appendBytes:&one length:sizeof(float)]; + [inputData appendBytes:&three length:(sizeof(float) - 1)]; + NSError *error; + XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize); +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index c6ef82ccdc..31b68c8f00 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -105,6 +105,7 @@ BLACKLIST = [ "//tensorflow/contrib/timeseries/python/timeseries:test_utils", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", # pylint:disable=line-too-long "//tensorflow/contrib/image:sparse_image_warp_test_data", + "//tools/build_defs/apple:ios.bzl", ] -- GitLab From 5be479930d3dcfa3edb863703b1d73b89d45f03c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 9 Oct 2018 17:19:24 -0700 Subject: [PATCH 167/411] [XLA:GPU] Use CudnnConvKind in more places. No functional change. PiperOrigin-RevId: 216451881 --- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../service/gpu/cudnn_convolution_runner.cc | 99 ++++++++++--------- .../xla/service/gpu/pad_for_tensor_cores.cc | 84 +++++++++------- .../compiler/xla/service/gpu/pad_insertion.cc | 31 +++--- 4 files changed, 116 insertions(+), 99 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 0144d59097..62da43d68a 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -591,6 +591,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_casting_utils", "//tensorflow/compiler/xla/service:hlo_creation_utils", "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/compiler/xla/service:shape_inference", diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index 89dd1bb272..a809c22b33 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -312,11 +312,12 @@ StatusOr GetCudnnConvParams( TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config, conv->backend_config()); - const auto& target = conv->custom_call_target(); + TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(conv)); const auto& lhs_shape = conv->operand(0)->shape(); const auto& rhs_shape = conv->operand(1)->shape(); const auto& conv_result_shape = conv->shape().tuple_shapes(0); + params.kind = kind; params.window = &conv->window(); params.dnums = &conv->convolution_dimension_numbers(); params.feature_group_count = conv->feature_group_count(); @@ -324,55 +325,55 @@ StatusOr GetCudnnConvParams( backend_config.algorithm(), backend_config.tensor_ops_enabled())); params.conv_result_scale = backend_config.conv_result_scale(); - if (target == kCudnnConvForwardCallTarget) { - params.kind = CudnnConvKind::kForward; - params.input_shape = &lhs_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &conv_result_shape; - params.input_buf = operand_buffers[0]; - params.filter_buf = operand_buffers[1]; - params.output_buf = result_buffer; - } else if (target == kCudnnConvBackwardInputCallTarget) { - params.kind = CudnnConvKind::kBackwardInput; - params.input_shape = &conv_result_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &lhs_shape; - params.input_buf = result_buffer; - params.filter_buf = operand_buffers[1]; - params.output_buf = operand_buffers[0]; - } else if (target == kCudnnConvBackwardFilterCallTarget) { - params.kind = CudnnConvKind::kBackwardFilter; - params.input_shape = &lhs_shape; - params.filter_shape = &conv_result_shape; - params.output_shape = &rhs_shape; - params.input_buf = operand_buffers[0]; - params.filter_buf = result_buffer; - params.output_buf = operand_buffers[1]; - } else if (target == kCudnnConvBiasActivationForwardCallTarget) { - params.kind = CudnnConvKind::kForwardActivation; - params.input_shape = &lhs_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &conv_result_shape; - params.fusion.emplace(); - auto& fusion = *params.fusion; - if (backend_config.activation_mode() < - static_cast(se::dnn::ActivationMode::kNumActivationModes)) { - fusion.mode = static_cast( - backend_config.activation_mode()); - } else { - return InternalError("Bad activation mode: %s", - backend_config.ShortDebugString()); - } - fusion.side_input_scale = backend_config.side_input_scale(); - params.input_buf = operand_buffers[0]; - params.filter_buf = operand_buffers[1]; - params.output_buf = result_buffer; - params.fusion->bias_buf = operand_buffers[2]; - if (operand_buffers.size() >= 4) { - params.fusion->side_input_buf = operand_buffers[3]; + switch (kind) { + case CudnnConvKind::kForward: + params.input_shape = &lhs_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &conv_result_shape; + params.input_buf = operand_buffers[0]; + params.filter_buf = operand_buffers[1]; + params.output_buf = result_buffer; + break; + case CudnnConvKind::kBackwardInput: + params.input_shape = &conv_result_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &lhs_shape; + params.input_buf = result_buffer; + params.filter_buf = operand_buffers[1]; + params.output_buf = operand_buffers[0]; + break; + case CudnnConvKind::kBackwardFilter: + params.input_shape = &lhs_shape; + params.filter_shape = &conv_result_shape; + params.output_shape = &rhs_shape; + params.input_buf = operand_buffers[0]; + params.filter_buf = result_buffer; + params.output_buf = operand_buffers[1]; + break; + case CudnnConvKind::kForwardActivation: { + params.kind = CudnnConvKind::kForwardActivation; + params.input_shape = &lhs_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &conv_result_shape; + params.fusion.emplace(); + auto& fusion = *params.fusion; + if (backend_config.activation_mode() < + static_cast(se::dnn::ActivationMode::kNumActivationModes)) { + fusion.mode = static_cast( + backend_config.activation_mode()); + } else { + return InternalError("Bad activation mode: %s", + backend_config.ShortDebugString()); + } + fusion.side_input_scale = backend_config.side_input_scale(); + params.input_buf = operand_buffers[0]; + params.filter_buf = operand_buffers[1]; + params.output_buf = result_buffer; + params.fusion->bias_buf = operand_buffers[2]; + if (operand_buffers.size() >= 4) { + params.fusion->side_input_buf = operand_buffers[3]; + } } - } else { - return InternalError("Unexpected custom call target: %s", target); } return params; } diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc index e3869b5c36..8f1f5a7bf5 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc @@ -105,38 +105,45 @@ static HloInstruction* PadInstruction(HloInstruction* instr, // Pads the input/output feature dimensions of the given cudnn convolution // custom-call to be multiples of kDesiredNumFeaturesFactor. -static StatusOr PadFeaturesDims(HloInstruction* conv) { +static StatusOr PadFeaturesDims(HloCustomCallInstruction* conv) { CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0)) << "conv must use 0 scratch bytes, i.e. this pass must be run " "before CudnnConvolutionAlgorithmPicker."; - const auto& target = conv->custom_call_target(); + TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); const auto& dnums = conv->convolution_dimension_numbers(); auto* lhs = conv->mutable_operand(0); auto* rhs = conv->mutable_operand(1); const Shape& result_shape = conv->shape().tuple_shapes(0); Shape new_lhs_shape = [&] { - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBackwardFilterCallTarget) { - // LHS is "input". - return PadShape(lhs->shape(), {dnums.input_feature_dimension()}); + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kBackwardFilter: + // LHS is "input". + return PadShape(lhs->shape(), {dnums.input_feature_dimension()}); + case CudnnConvKind::kBackwardInput: + // LHS is "output". + return PadShape(lhs->shape(), {dnums.output_feature_dimension()}); + case CudnnConvKind::kForwardActivation: + LOG(FATAL) << "Not yet implemented."; } - CHECK_EQ(target, kCudnnConvBackwardInputCallTarget); - // LHS is "output". - return PadShape(lhs->shape(), {dnums.output_feature_dimension()}); }(); Shape new_rhs_shape = [&] { - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBackwardInputCallTarget) { - // RHS is "filter". - return PadShape(rhs->shape(), {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kBackwardInput: + // RHS is "filter". + return PadShape(rhs->shape(), + {dnums.kernel_input_feature_dimension(), + dnums.kernel_output_feature_dimension()}); + case CudnnConvKind::kBackwardFilter: + // RHS is "output". + return PadShape(rhs->shape(), {dnums.output_feature_dimension()}); + case CudnnConvKind::kForwardActivation: + LOG(FATAL) << "Not yet implemented."; } - CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget); - // RHS is "output". - return PadShape(rhs->shape(), {dnums.output_feature_dimension()}); }(); if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) && @@ -146,18 +153,21 @@ static StatusOr PadFeaturesDims(HloInstruction* conv) { } Shape new_result_shape = [&] { - if (target == kCudnnConvForwardCallTarget) { - // Result is "output". - return PadShape(result_shape, {dnums.output_feature_dimension()}); + switch (kind) { + case CudnnConvKind::kForward: + // Result is "output". + return PadShape(result_shape, {dnums.output_feature_dimension()}); + case CudnnConvKind::kBackwardInput: + // Result is "input". + return PadShape(result_shape, {dnums.input_feature_dimension()}); + case CudnnConvKind::kBackwardFilter: + // Result is "filter". + return PadShape(result_shape, + {dnums.kernel_input_feature_dimension(), + dnums.kernel_output_feature_dimension()}); + case CudnnConvKind::kForwardActivation: + LOG(FATAL) << "Not yet implemented."; } - if (target == kCudnnConvBackwardInputCallTarget) { - // Result is "input". - return PadShape(result_shape, {dnums.input_feature_dimension()}); - } - CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget); - // Result is "filter". - return PadShape(result_shape, {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); }(); // Check that padding wouldn't increase the total bytes read/written by this @@ -223,16 +233,20 @@ static StatusOr PadFeaturesDims(HloInstruction* conv) { return true; } -static std::vector GetRelevantConvs(HloComputation* comp) { - std::vector convs; +static std::vector GetRelevantConvs( + HloComputation* comp) { + std::vector convs; for (HloInstruction* instr : comp->instructions()) { - if (IsCustomCallToDnnConvolution(*instr) && - instr->operand(0)->shape().element_type() == F16 && + if (!IsCustomCallToDnnConvolution(*instr)) { + continue; + } + auto* custom_call = Cast(instr); + if (custom_call->operand(0)->shape().element_type() == F16 && // TODO(timshen): Disable for fused conv for now. Implement it if it's // needed. - Cast(instr)->custom_call_target() != + custom_call->custom_call_target() != kCudnnConvBiasActivationForwardCallTarget) { - convs.push_back(instr); + convs.push_back(custom_call); } } return convs; @@ -241,7 +255,7 @@ static std::vector GetRelevantConvs(HloComputation* comp) { StatusOr PadForTensorCores::Run(HloModule* module) { bool changed = false; for (HloComputation* comp : module->MakeNonfusionComputations()) { - for (HloInstruction* conv : GetRelevantConvs(comp)) { + for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv)); changed |= result; } diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc index b42a19e3a2..ae7abca7c6 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_creation_utils.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/util.h" @@ -378,25 +379,25 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution( StatusOr PadInsertion::RunOnComputation(HloComputation* computation) { bool changed = false; - std::vector convs; + std::vector convs; for (auto* instr : computation->instructions()) { if (IsCustomCallToDnnConvolution(*instr)) { - convs.push_back(instr); + convs.push_back(Cast(instr)); } } - for (HloInstruction* instruction : convs) { - const auto& target = instruction->custom_call_target(); - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBiasActivationForwardCallTarget) { - changed |= CanonicalizeForwardConvolution(instruction); - } else if (target == kCudnnConvBackwardFilterCallTarget) { - changed |= CanonicalizeBackwardFilterConvolution(instruction); - } else if (target == kCudnnConvBackwardInputCallTarget) { - changed |= CanonicalizeBackwardInputConvolution(instruction); - } else { - LOG(FATAL) << "Unknown custom call target for cudnn conv: " - << instruction->ToString(); - } + for (HloCustomCallInstruction* instruction : convs) { + TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instruction)); + changed |= [&] { + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kForwardActivation: + return CanonicalizeForwardConvolution(instruction); + case CudnnConvKind::kBackwardInput: + return CanonicalizeBackwardInputConvolution(instruction); + case CudnnConvKind::kBackwardFilter: + return CanonicalizeBackwardFilterConvolution(instruction); + } + }(); } return changed; } -- GitLab From ee1cb110360b12d752c9cb4ebbb76d33930f67d7 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 9 Oct 2018 17:23:45 -0700 Subject: [PATCH 168/411] Move tflite_convert g3docs, so they will be pulled into the site. PiperOrigin-RevId: 216452447 --- tensorflow/contrib/lite/g3doc/_book.yaml | 9 ++++ .../tflite_convert}/cmdline_examples.md | 54 ++++++++----------- .../tflite_convert}/cmdline_reference.md | 17 ++---- .../lite/g3doc/tflite_convert/index.md | 22 ++++++++ .../tflite_convert}/python_api.md | 29 ++-------- .../tflite_convert}/toco_landscape.svg | 0 tensorflow/contrib/lite/toco/g3doc/README.md | 3 ++ 7 files changed, 63 insertions(+), 71 deletions(-) rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_examples.md (90%) rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_reference.md (93%) create mode 100644 tensorflow/contrib/lite/g3doc/tflite_convert/index.md rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/python_api.md (89%) rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/toco_landscape.svg (100%) create mode 100644 tensorflow/contrib/lite/toco/g3doc/README.md diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml index de6914e536..f6ec387ad2 100644 --- a/tensorflow/contrib/lite/g3doc/_book.yaml +++ b/tensorflow/contrib/lite/g3doc/_book.yaml @@ -38,6 +38,15 @@ upper_tabs: path: /lite/ios - title: TensorFlow Lite for Raspberry Pi path: /lite/rpi + - heading: TFLite Converter + - title: Overview + path: /lite/tflite_convert/ + - title: Python API + path: /lite/tflite_convert/python_api + - title: Command Line Examples + path: /lite/tflite_convert/cmdline_examples + - title: Command Line Reference + path: /lite/tflite_convert/cmdline_reference - title: TF Mobile style: accordion diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md similarity index 90% rename from tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md index e3c46eb377..d88acfae80 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md @@ -1,33 +1,8 @@ # TensorFlow Lite Converter command-line examples -This page shows how to use the TensorFlow Lite Converter in the command line. It -is complemented by the following documents: - -* [README](../README.md) -* [Command-line glossary](cmdline_reference.md) -* [Python API examples](python_api.md) - -Table of contents: - -* [Command-line tools](#tools) - * [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9) -* [Basic examples](#basic) - * [Convert a TensorFlow GraphDef](#graphdef) - * [Convert a TensorFlow SavedModel](#savedmodel) - * [Convert a tf.keras model](#keras) -* [Quantization](#quantization) - * [Convert a TensorFlow GraphDef for quantized inference](#graphdef-quant) - * [Use "dummy-quantization" to try out quantized inference on a float - graph](#dummy-quant) -* [Specifying input and output arrays](#specifying-input-and-output-arrays) - * [Multiple input arrays](#multiple-input-arrays) - * [Multiple output arrays](#multiple-output-arrays) - * [Specifying subgraphs](#specifying-subgraphs) -* [Graph visualizations](#graph-visualizations) - * [Using --output_format=GRAPHVIZ_DOT](#using-output-format-graphviz-dot) - * [Using --dump_graphviz_dir](#using-dump-graphviz-dir) - * [Graph "video" logging](#graph-video-logging) - * [Legend for the graph visualizations](#graphviz-legend) +This page shows how to use the TensorFlow Lite Converter in the command line. + +[TOC] ## Command-line tools @@ -325,10 +300,23 @@ As before, these can be rendered to PDFs: dot -Tpdf -O /tmp/toco_*.dot ``` -Sample output files can be seen here: - -* [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf) -* [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf). +Sample output files can be seen here below. Note that it is the same +`AveragePool` node in the top right of each image. + + + + + + +
+ + + + + + + +
beforeafter
### Graph "video" logging @@ -347,7 +335,7 @@ change was introduced in the graph. * Some typically heavy operators (e.g. Conv) are rendered in a darker red. -* Arrays are octogons with the following colors: +* Arrays are octagons with the following colors: * Constant arrays are blue. * Activation arrays are gray: diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md similarity index 93% rename from tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md index 31200fd657..d65912fea6 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md @@ -2,18 +2,9 @@ This page is complete reference of command-line flags used by the TensorFlow Lite Converter's command line starting from TensorFlow 1.9 up until the most -recent build of TensorFlow. It is complemented by the following other documents: +recent build of TensorFlow. -* [README](../README.md) -* [Command-line examples](cmdline_examples.md) -* [Python API examples](python_api.md) - -Table of contents: - -* [High-level flags](#high-level-flags) -* [Model flags](#model-flags) -* [Transformation flags](#transformation-flags) -* [Logging flags](#logging-flags) +[TOC] ## High-level flags @@ -32,7 +23,7 @@ files. The flag `--output_file` is always required. Additionally, either * `--output_format`. Type: string. Default: `TFLITE`. Specifies the format of the output file. Allowed values: * `TFLITE`: TensorFlow Lite FlatBuffer format. - * `GRAPHVIZ_DOT`: GraphViz `.dot` format containg a visualization of the + * `GRAPHVIZ_DOT`: GraphViz `.dot` format containing a visualization of the graph after graph transformations. * Note that passing `GRAPHVIZ_DOT` to `--output_format` leads to loss of TFLite specific transformations. Therefore, the resulting @@ -68,7 +59,7 @@ based on index. * `--input_shapes`. Type: colon-separated list of comma-separated lists of integers. Each comma-separated list of integers gives the shape of one of the input arrays specified in - [TensorFlow convention](https://www.tensorflow.org/versions/r1.2/programmers_guide/dims_types#shape). + [TensorFlow convention](https://www.tensorflow.org/guide/dims_types#shape). * Example: `--input_shapes=1,60,80,3` for a typical vision model means a batch size of 1, an input image height of 60, an input image width of 80, and an input image depth of 3 (representing RGB channels). diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/index.md b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md new file mode 100644 index 0000000000..12ba0225f6 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md @@ -0,0 +1,22 @@ +# TensorFlow Lite Converter + +The TensorFlow Lite Converter converts TensorFlow graphs into +TensorFlow Lite graphs. There are additional usages that are also detailed in +the usage documentation. + + +## Where the converter fits in the TensorFlow landscape + +Once an application developer has a trained TensorFlow model, the TensorFlow +Lite Converter will accept +that model and generate a TensorFlow Lite +[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports +[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators), +frozen graphs (models generated via +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)), +and `tf.Keras` model files. The TensorFlow Lite FlatBuffer file can be shipped +to client devices, generally mobile devices, where the TensorFlow Lite +interpreter handles them on-device. This flow is represented in the diagram +below. + +![drawing](toco_landscape.svg) diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md similarity index 89% rename from tensorflow/contrib/lite/toco/g3doc/python_api.md rename to tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md index 1f741360c6..e1c0e0c240 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md @@ -1,31 +1,10 @@ # TensorFlow Lite Converter & Interpreter Python API reference This page provides examples on how to use the TensorFlow Lite Converter and the -TensorFlow Lite interpreter using the Python API. It is complemented by the -following documents: - -* [README](../README.md) -* [Command-line examples](cmdline_examples.md) -* [Command-line glossary](cmdline_reference.md) - -Table of contents: - -* [High-level overview](#high-level-overview) -* [API](#api) -* [Basic examples](#basic) - * [Exporting a GraphDef from tf.Session](#basic-graphdef-sess) - * [Exporting a GraphDef from file](#basic-graphdef-file) - * [Exporting a SavedModel](#basic-savedmodel) - * [Exporting a tf.keras File](#basic-keras-file) -* [Complex examples](#complex) - * [Exporting a quantized GraphDef](#complex-quant) -* [TensorFlow Lite Python interpreter](#interpreter) - * [Using the interpreter from a model file](#interpreter-file) - * [Using the interpreter from model data](#interpreter-data) -* [Additional instructions](#additional-instructions) - * [Build from source code](#latest-package) - * [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) - * [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9) +TensorFlow Lite interpreter using the Python API. + +[TOC] + ## High-level overview diff --git a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg b/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg similarity index 100% rename from tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg rename to tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg diff --git a/tensorflow/contrib/lite/toco/g3doc/README.md b/tensorflow/contrib/lite/toco/g3doc/README.md new file mode 100644 index 0000000000..2153b6cc63 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/README.md @@ -0,0 +1,3 @@ +# TOCO + +These files have moved to [../../g3doc/tflite_convert](../../g3doc/tflite_convert) -- GitLab From eaebeb1d4d939fb9fd0b75e32a76151cb517bfb6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 17:24:11 -0700 Subject: [PATCH 169/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216452496 --- tensorflow/core/ops/ops.pbtxt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 05b97bffad..a8da95dea3 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -15116,6 +15116,22 @@ op { } is_stateful: true } +op { + name: "LookupTableRemoveV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + input_arg { + name: "keys" + type_attr: "Tin" + } + attr { + name: "Tin" + type: "type" + } + is_stateful: true +} op { name: "LookupTableSize" input_arg { @@ -17701,6 +17717,10 @@ op { name: "empty_key" type_attr: "key_dtype" } + input_arg { + name: "deleted_key" + type_attr: "key_dtype" + } output_arg { name: "table_handle" type: DT_RESOURCE -- GitLab From f0784e69761ef5b78480e9e8b1fd1aa558186646 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 17:35:41 -0700 Subject: [PATCH 170/411] Add support for modeling fast memory close to the processor/gpu PiperOrigin-RevId: 216453979 --- .../core/grappler/costs/cost_estimator.h | 38 +++++++++- .../grappler/costs/op_level_cost_estimator.cc | 76 +++++++++++++------ .../grappler/costs/op_level_cost_estimator.h | 3 +- .../core/grappler/costs/virtual_scheduler.cc | 31 +++++--- 4 files changed, 112 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h index 569d9da683..811e923b87 100644 --- a/tensorflow/core/grappler/costs/cost_estimator.h +++ b/tensorflow/core/grappler/costs/cost_estimator.h @@ -31,8 +31,37 @@ constexpr int64 kMemoryUnknown = -1ll; constexpr int64 kZeroMemory = 0ll; struct DeviceInfo { - double gigaops; // Billions of operations executed per second. - double gb_per_sec; // Bandwidth to main memory in GB per second. + // Billions of operations executed per second. + double gigaops; + + // Bandwidth to main memory in GB per second. + double gb_per_sec; + + // Read bandwidth to intermediate memory in GB per second. + double intermediate_read_gb_per_sec; + + // Read bandwidth to intermediate memory in GB per second. + double intermediate_write_gb_per_sec; + + DeviceInfo() + : gigaops(INFINITY), + gb_per_sec(INFINITY), + intermediate_read_gb_per_sec(INFINITY), + intermediate_write_gb_per_sec(INFINITY) {} + + DeviceInfo(const DeviceInfo& input) + : gigaops(input.gigaops), + gb_per_sec(input.gb_per_sec), + intermediate_read_gb_per_sec(input.intermediate_read_gb_per_sec), + intermediate_write_gb_per_sec(input.intermediate_write_gb_per_sec) {} + + DeviceInfo(double gigaops, double gb_per_sec, + double intermediate_read_gb_per_sec = INFINITY, + double intermediate_write_gb_per_sec = INFINITY) + : gigaops(gigaops), + gb_per_sec(gb_per_sec), + intermediate_read_gb_per_sec(intermediate_read_gb_per_sec), + intermediate_write_gb_per_sec(intermediate_write_gb_per_sec) {} }; // Holds the set of things we might want to estimate or measure in Grappler. @@ -101,6 +130,9 @@ struct Costs { // Memory access cost of running the graph. Duration memory_time; + // Intermediate memory access cost of running the graph + Duration intermediate_memory_time; + // This field can be a very pessimistic estimate of the main memory // requirements of a graph. For example, it might assume that all activations // are live for all of a graph's execution. @@ -146,6 +178,7 @@ Costs::Costs() { execution_time = Duration::zero(); compute_time = Duration::zero(); memory_time = Duration::zero(); + intermediate_memory_time = Duration::zero(); max_memory = kMemoryUnknown; persistent_memory = kMemoryUnknown; temporary_memory = kMemoryUnknown; @@ -158,6 +191,7 @@ Costs Costs::ZeroCosts() { costs.execution_time = Duration::zero(); costs.compute_time = Duration::zero(); costs.memory_time = Duration::zero(); + costs.intermediate_memory_time = Duration::zero(); costs.max_memory = kZeroMemory; costs.persistent_memory = kZeroMemory; costs.temporary_memory = kZeroMemory; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index f363f2915f..76e5c989fc 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -420,7 +420,7 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo( DCHECK_LT(0, gflops) << device.DebugString(); DCHECK_LT(0, gb_per_sec) << device.DebugString(); - return {gflops, gb_per_sec}; + return DeviceInfo(gflops, gb_per_sec); } Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { @@ -478,8 +478,8 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( bool unknown_shapes = false; const double input_size = CalculateInputSize(op_info, &unknown_shapes); const double output_size = CalculateOutputSize(op_info, &unknown_shapes); - const double total_io_bytes = input_size + output_size; - Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info); + Costs costs = + PredictOpCountBasedCost(operations, input_size, output_size, op_info); costs.inaccurate = unknown_shapes; costs.num_ops_with_unknown_shapes = unknown_shapes; costs.max_memory = output_size; @@ -487,9 +487,13 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( } Costs OpLevelCostEstimator::PredictOpCountBasedCost( - double operations, double total_io_bytes, const OpInfo& op_info) const { + double operations, double input_io_bytes, double output_io_bytes, + const OpInfo& op_info) const { + double total_io_bytes = input_io_bytes + output_io_bytes; const DeviceInfo device_info = GetDeviceInfo(op_info.device()); - if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0) { + if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0 || + device_info.intermediate_read_gb_per_sec <= 0 || + device_info.intermediate_write_gb_per_sec <= 0) { VLOG(1) << "BAD DEVICE. Op:" << op_info.op() << " device type:" << op_info.device().type() << " device model:" << op_info.device().model(); @@ -504,9 +508,29 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3 << " Memory Time (ns):" << memory_cost.count(); + // Check if bytes > 0. If it's not and the bandwidth is set to infinity + // then the result would be undefined. + double intermediate_read_time = + (input_io_bytes > 0) + ? std::ceil(input_io_bytes / device_info.intermediate_read_gb_per_sec) + : 0; + + double intermediate_write_time = + (output_io_bytes > 0) + ? std::ceil(output_io_bytes / + device_info.intermediate_write_gb_per_sec) + : 0; + + Costs::NanoSeconds intermediate_memory_cost(intermediate_read_time + + intermediate_write_time); + VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3 + << " Intermediate Memory Time (ns):" + << intermediate_memory_cost.count(); + Costs costs; costs.compute_time = compute_cost; costs.memory_time = memory_cost; + costs.intermediate_memory_time = intermediate_memory_cost; CombineCostsAndUpdateExecutionTime(&costs); return costs; } @@ -1273,8 +1297,8 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice( CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes); } - const double total_io = input_size + output_size; - Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info); + Costs costs = + PredictOpCountBasedCost(op_count, input_size, output_size, op_info); costs.inaccurate = unknown_shapes; costs.num_ops_with_unknown_shapes = unknown_shapes; costs.max_memory = output_size; @@ -1291,12 +1315,15 @@ Costs OpLevelCostEstimator::PredictFusedOp( // operations here; so we simply add the compute times of each component // operation, then update the execution time. Costs fused_cost = PredictOpCountBasedCost(0, op_context.op_info); + fused_cost.compute_time = 0; fused_cost.inaccurate = false; for (auto& fused_op : fused_op_contexts) { auto op_cost = PredictCosts(fused_op); + fused_cost.compute_time += op_cost.compute_time; fused_cost.inaccurate |= op_cost.inaccurate; + fused_cost.intermediate_memory_time += op_cost.intermediate_memory_time; } CombineCostsAndUpdateExecutionTime(&fused_cost); @@ -1415,8 +1442,8 @@ Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const { const double total_output_size = CalculateOutputSize(op_info, &found_unknown_shapes); - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size, op_info); + Costs costs = PredictOpCountBasedCost(ops, total_input_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1458,8 +1485,8 @@ Costs OpLevelCostEstimator::PredictMaxPoolGrad( const double total_output_size = CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size, op_info); + Costs costs = PredictOpCountBasedCost(ops, total_input_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1491,8 +1518,8 @@ Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const { const double total_output_size = CalculateOutputSize(op_info, &found_unknown_shapes); - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size, op_info); + Costs costs = PredictOpCountBasedCost(ops, total_input_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1544,8 +1571,8 @@ Costs OpLevelCostEstimator::PredictAvgPoolGrad( const double total_output_size = CalculateOutputSize(op_info, &found_unknown_shapes); - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size, op_info); + Costs costs = PredictOpCountBasedCost(ops, total_input_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1590,9 +1617,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNorm( total_output_size = size_nhwc; } - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size + total_internal_read_size, - op_info); + Costs costs = + PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1624,9 +1651,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad( double total_internal_read_size = size_nhwc; double total_output_size = size_nhwc * 1 + size_c * 2; - Costs costs = PredictOpCountBasedCost( - ops, total_input_size + total_output_size + total_internal_read_size, - op_info); + Costs costs = + PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size, + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; @@ -1637,9 +1664,12 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad( void OpLevelCostEstimator::CombineCostsAndUpdateExecutionTime( Costs* costs) const { if (compute_memory_overlap_) { - costs->execution_time = std::max(costs->compute_time, costs->memory_time); + costs->execution_time = + std::max(costs->intermediate_memory_time, + std::max(costs->compute_time, costs->memory_time)); } else { - costs->execution_time = costs->compute_time + costs->memory_time; + costs->execution_time = costs->compute_time + costs->memory_time + + costs->intermediate_memory_time; } } } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index dd1ee39cb2..84dd9213f7 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -54,7 +54,8 @@ class OpLevelCostEstimator { // Naive cost estimate based on the given operations count and the given total // io size in bytes. Sizes of op_info inputs and outputs are not taken into // consideration. - Costs PredictOpCountBasedCost(double operations, double total_io_bytes, + Costs PredictOpCountBasedCost(double operations, double input_io_bytes, + double output_io_bytes, const OpInfo& op_info) const; // This family of routines counts the number of operations to perform the diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 5b93fb128f..5c5bdad1cb 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -47,6 +47,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) { result.execution_time += right.execution_time; result.compute_time += right.compute_time; result.memory_time += right.memory_time; + result.intermediate_memory_time += right.intermediate_memory_time; result.num_ops_total += right.num_ops_total; if (right.inaccurate) result.inaccurate = true; @@ -825,23 +826,29 @@ Costs VirtualScheduler::Summary() const { VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count(); VLOG(1) << "Expected compute time: " << graph_costs_.compute_time.count(); VLOG(1) << "Expected memory time: " << graph_costs_.memory_time.count(); + VLOG(1) << "Expected intermediate memory time: " + << graph_costs_.intermediate_memory_time.count(); VLOG(1) << "Expected max memory: " << graph_costs_.max_memory; VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers; VLOG(1) << "Expected max per-op streaming buffers: " << graph_costs_.max_per_op_streaming; - VLOG(1) << "Per-op execution time / compute time / memory time:"; + VLOG(1) << "Per-op execution time / compute time / memory time" + << " / intermediate memory time:"; for (const auto& op_cost_pair : op_to_cost_) { const auto& op = op_cost_pair.first; const auto& cost = op_cost_pair.second.execution_time.count(); const auto& compute_cost = op_cost_pair.second.compute_time.count(); const auto& memory_cost = op_cost_pair.second.memory_time.count(); + const auto& intermediate_memory_cost = + op_cost_pair.second.intermediate_memory_time.count(); const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate; if (cost) { // Skip printing out zero-cost ops. VLOG(1) << strings::Printf( - " + %30s : %c %10lld / %10lld / %10lld", op.c_str(), + " + %30s : %c %10lld / %10lld / %10lld / %10lld", op.c_str(), (is_op_cost_accurate ? ' ' : '~'), static_cast(cost), - static_cast(compute_cost), static_cast(memory_cost)); + static_cast(compute_cost), static_cast(memory_cost), + static_cast(intermediate_memory_cost)); } } @@ -894,7 +901,8 @@ Costs VirtualScheduler::Summary() const { << " having unknown shapes"; VLOG(1) << "Per-op execution time / compute time / memory time " - "(and memory usage at peak memory usage):"; + << " / intermediate memory time" + << " (and memory usage at peak memory usage):"; // Profile non-persistent op memory usage. for (const auto& node_port : state.mem_usage_snapshot_at_peak) { @@ -910,6 +918,8 @@ Costs VirtualScheduler::Summary() const { const auto& cost = op_cost_pair.second.execution_time.count(); const auto& compute_cost = op_cost_pair.second.compute_time.count(); const auto& memory_cost = op_cost_pair.second.memory_time.count(); + const auto& intermediate_memory_cost = + op_cost_pair.second.intermediate_memory_time.count(); total_compute_time_ns += op_cost_pair.second.execution_time; const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate; if (!is_op_cost_accurate) { @@ -927,12 +937,13 @@ Costs VirtualScheduler::Summary() const { : 0.0; if (cost || mem_usage_percent > 1.0) { // Print out only non-zero cost ops or ops with > 1% memory usage. - VLOG(1) << strings::Printf(" + %30s : %c %10lld / %10lld / %10lld", - op.c_str(), - (is_op_cost_accurate ? ' ' : '~'), - static_cast(cost), - static_cast(compute_cost), - static_cast(memory_cost)) + VLOG(1) << strings::Printf( + " + %30s : %c %10lld / %10lld / %10lld / %10lld", + op.c_str(), (is_op_cost_accurate ? ' ' : '~'), + static_cast(cost), + static_cast(compute_cost), + static_cast(memory_cost), + static_cast(intermediate_memory_cost)) << " (" << strings::HumanReadableNumBytes(op_mem_usage) << " [" << mem_usage_percent << "%] " << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")"); -- GitLab From 75ee5ee51314feef5654ef315960c26d27d657a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 17:46:22 -0700 Subject: [PATCH 171/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216455250 --- tensorflow/go/op/wrappers.go | 111 ++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index f35117084a..c6ecd75587 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -4562,6 +4562,59 @@ func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Out return decoded_indices, decoded_values, decoded_shape, log_probability } +// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder. +type CTCGreedyDecoderAttr func(optionalAttr) + +// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value. +// +// value: If True, merge repeated classes in output. +// If not specified, defaults to false +func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr { + return func(m optionalAttr) { + m["merge_repeated"] = value + } +} + +// Performs greedy decoding on the logits given in inputs. +// +// A note about the attribute merge_repeated: if enabled, when +// consecutive logits' maximum indices are the same, only the first of +// these is emitted. Labeling the blank '*', the sequence "A B B * B B" +// becomes "A B B" if merge_repeated = True and "A B B B B" if +// merge_repeated = False. +// +// Regardless of the value of merge_repeated, if the maximum index of a given +// time and batch corresponds to the blank, index `(num_classes - 1)`, no new +// element is emitted. +// +// Arguments: +// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. +// sequence_length: A vector containing sequence lengths, size `(batch_size)`. +// +// Returns Indices matrix, size `(total_decoded_outputs x 2)`, +// of a `SparseTensor`. The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`, +// of a `SparseTensor`. The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor. +// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence +// log-probabilities. +func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CTCGreedyDecoder", + Input: []tf.Input{ + inputs, sequence_length, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) +} + // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. type ResourceStridedSliceAssignAttr func(optionalAttr) @@ -18904,10 +18957,11 @@ func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2 // Arguments: // empty_key: The key used to represent empty key buckets internally. Must not // be used in insert or lookup operations. +// // value_dtype: Type of the table values. // // Returns Handle to a table. -func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { +func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } @@ -18918,7 +18972,7 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D opspec := tf.OpSpec{ Type: "MutableDenseHashTableV2", Input: []tf.Input{ - empty_key, + empty_key, deleted_key, }, Attrs: attrs, } @@ -33104,56 +33158,3 @@ func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_va op := scope.AddOperation(opspec) return op.Output(0), op.Output(1) } - -// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder. -type CTCGreedyDecoderAttr func(optionalAttr) - -// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value. -// -// value: If True, merge repeated classes in output. -// If not specified, defaults to false -func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr { - return func(m optionalAttr) { - m["merge_repeated"] = value - } -} - -// Performs greedy decoding on the logits given in inputs. -// -// A note about the attribute merge_repeated: if enabled, when -// consecutive logits' maximum indices are the same, only the first of -// these is emitted. Labeling the blank '*', the sequence "A B B * B B" -// becomes "A B B" if merge_repeated = True and "A B B B B" if -// merge_repeated = False. -// -// Regardless of the value of merge_repeated, if the maximum index of a given -// time and batch corresponds to the blank, index `(num_classes - 1)`, no new -// element is emitted. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// sequence_length: A vector containing sequence lengths, size `(batch_size)`. -// -// Returns Indices matrix, size `(total_decoded_outputs x 2)`, -// of a `SparseTensor`. The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`, -// of a `SparseTensor`. The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor. -// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence -// log-probabilities. -func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCGreedyDecoder", - Input: []tf.Input{ - inputs, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} -- GitLab From a8cc3cbdeb1563c05d75043c9901135f8b9be65a Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Tue, 9 Oct 2018 17:50:47 -0700 Subject: [PATCH 172/411] Fix lite/kernels:add_test for Clang 8.0.0 PiperOrigin-RevId: 216455772 --- tensorflow/contrib/lite/kernels/add_test.cc | 36 ++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc index 0b58443211..261dd36ef0 100644 --- a/tensorflow/contrib/lite/kernels/add_test.cc +++ b/tensorflow/contrib/lite/kernels/add_test.cc @@ -108,7 +108,7 @@ TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) { } TEST(FloatAddOpModel, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -124,7 +124,7 @@ TEST(FloatAddOpModel, VariousInputShapes) { } TEST(FloatAddOpModel, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -161,7 +161,7 @@ TEST(IntegerAddOpModel, ActivationRELU_N1_TO_1) { } TEST(IntegerAddOpModel, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerAddOpModel m({TensorType_INT32, test_shapes[i]}, @@ -176,7 +176,7 @@ TEST(IntegerAddOpModel, VariousInputShapes) { } TEST(IntegerAddOpModel, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerAddOpModel m({TensorType_INT32, test_shapes[i]}, @@ -193,11 +193,11 @@ TEST(IntegerAddOpModel, WithBroadcast) { TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector> inputs1 = { + std::vector> inputs1 = { {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { + std::vector> inputs2 = { {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { + std::vector> results = { {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -217,11 +217,11 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = { + std::vector> inputs1 = { {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { + std::vector> inputs2 = { {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { + std::vector> results = { {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, @@ -240,12 +240,12 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, - {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, - {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = {{-0.2, 0.6, 1.0, -0.1}, - {-0.2, 0.6, -0.1, 0.8}}; + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.5}}; + std::vector> results = {{-0.2, 0.6, 1.0, -0.1}, + {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -262,7 +262,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) { TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, @@ -281,7 +281,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) { TEST(QuantizedAddOpModel, QuantizedWithBroadcast) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, -- GitLab From 2db20be49c660a0c475cb57fe0935791d66433ed Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 9 Oct 2018 17:59:06 -0700 Subject: [PATCH 173/411] Enable support for lambda functions in static analyses. The CFG treats lambdas as ordinary expressions. The activity analysis ensures that variables masked by the lambda's arguments are not being tracked. Note: lambdas do not allow direct modification (we exclude indirect mutation via function or methods). PiperOrigin-RevId: 216456682 --- tensorflow/python/autograph/pyct/cfg.py | 4 -- tensorflow/python/autograph/pyct/cfg_test.py | 16 +++++++ .../pyct/static_analysis/activity.py | 44 ++++++++++++++++--- .../pyct/static_analysis/activity_test.py | 34 ++++++++++++++ 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py index ec733ea38f..fdfcd4dcc1 100644 --- a/tensorflow/python/autograph/pyct/cfg.py +++ b/tensorflow/python/autograph/pyct/cfg.py @@ -679,10 +679,6 @@ class AstToCfg(gast.NodeVisitor): self.cfgs[node] = self.builder.build() self.builder = self.builder_stack.pop() - def visit_Lambda(self, node): - # TODO(mdan): Treat like FunctionDef? That would be a separate CFG. - raise NotImplementedError() - def visit_Return(self, node): self._process_exit_statement(node, gast.FunctionDef) diff --git a/tensorflow/python/autograph/pyct/cfg_test.py b/tensorflow/python/autograph/pyct/cfg_test.py index bd82e70f7d..d5870124bc 100644 --- a/tensorflow/python/autograph/pyct/cfg_test.py +++ b/tensorflow/python/autograph/pyct/cfg_test.py @@ -964,6 +964,22 @@ class AstToCfgTest(test.TestCase): ), ) + def test_lambda_basic(self): + + def test_fn(a): + a = lambda b: a + b + return a + + graph, = self._build_cfg(test_fn).values() + + self.assertGraphMatches( + graph, + ( + ('a', 'a = lambda b: a + b', 'return a'), + ('a = lambda b: a + b', 'return a', None), + ), + ) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py index cc159031ff..0ce410d522 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py @@ -146,8 +146,15 @@ class ActivityAnalyzer(transformer.Base): def __init__(self, context, parent_scope=None, add_unknown_symbols=False): super(ActivityAnalyzer, self).__init__(context) self.scope = Scope(parent_scope, None, add_unknown_symbols) + + # Note: all these flags crucially rely on the respective nodes are + # leaves in the AST, that is, they cannot contain other statements. self._in_return_statement = False self._in_aug_assign = False + self._in_lambda = False + self._in_function_def_args = False + + self._untracked_symbols = None @property def _in_constructor(self): @@ -172,6 +179,13 @@ class ActivityAnalyzer(transformer.Base): return qn = anno.getanno(node, anno.Basic.QN) + # Ignore any untracked symbols. + if self._untracked_symbols: + if qn in self._untracked_symbols: + return + if qn.owner_set & set(self._untracked_symbols): + return + if isinstance(node.ctx, gast.Store): self.scope.mark_modified(qn) if qn.is_composite and composite_writes_alter_parent: @@ -181,12 +195,20 @@ class ActivityAnalyzer(transformer.Base): elif isinstance(node.ctx, gast.Load): self.scope.mark_read(qn) elif isinstance(node.ctx, gast.Param): - # Param contexts appear in function defs, so they have the meaning of - # defining a variable. - self.scope.mark_modified(qn) - self.scope.mark_param(qn, self.enclosing_entities[-1]) + if self._in_function_def_args: + # In function defs have the meaning of defining a variable. + self.scope.mark_modified(qn) + self.scope.mark_param(qn, self.enclosing_entities[-1]) + elif self._in_lambda: + assert isinstance(self._untracked_symbols, set) + self._untracked_symbols.add(qn) + else: + # TODO(mdan): Is this case even possible? + raise NotImplementedError( + 'Param "{}" outside a function arguments or lambda.'.format(qn)) else: - raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn)) + raise ValueError('Unknown context {} for node "{}".'.format( + type(node.ctx), qn)) if self._in_return_statement: self.scope.mark_returned(qn) @@ -294,6 +316,15 @@ class ActivityAnalyzer(transformer.Base): self.scope.merge_from(after_child) return parent + def visit_Lambda(self, node): + assert not self._in_lambda or self._in_function_def_args + self._in_lambda = True + self._untracked_symbols = set() + node = self.generic_visit(node) + self._untracked_symbols = None + self._in_lambda = False + return node + def visit_arguments(self, node): return self._process_statement(node) @@ -308,7 +339,10 @@ class ActivityAnalyzer(transformer.Base): # A separate Scope tracks the actual function definition. self._enter_scope(True) + assert not self._in_function_def_args + self._in_function_def_args = True node.args = self.visit(node.args) + self._in_function_def_args = False # Track the body separately. This is for compatibility reasons, it may not # be strictly needed. diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py index 9a4f1bf09b..678199970c 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py @@ -427,6 +427,40 @@ class ActivityAnalyzerTest(test.TestCase): args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE) self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params') + def test_lambda_captures_reads(self): + + def test_fn(a, b): + return lambda: a + b + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(body_scope, ('a', 'b'), ()) + # Nothing local to the lambda is tracked. + self.assertSymbolSetsAre((), body_scope.params.keys(), 'params') + + def test_lambda_params_are_isolated(self): + + def test_fn(a, b): # pylint: disable=unused-argument + return lambda a: a + b + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(body_scope, ('b',), ()) + self.assertSymbolSetsAre((), body_scope.params.keys(), 'params') + + def test_lambda_complex(self): + + def test_fn(a, b, c, d): # pylint: disable=unused-argument + a = (lambda a, b, c: a + b + c)(d, 1, 2) + b + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(body_scope, ('b', 'd'), ('a',)) + self.assertSymbolSetsAre((), body_scope.params.keys(), 'params') + if __name__ == '__main__': test.main() -- GitLab From 48b24214dd5da842bd00414b46f3e46319c777ee Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 9 Oct 2018 18:47:55 -0700 Subject: [PATCH 174/411] Update model in keras dist strat learning phase test to return consistent values. PiperOrigin-RevId: 216461637 --- .../contrib/distribute/python/keras_test.py | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index 3511b7761f..6553642ad3 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -592,33 +592,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase, # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): - x = keras.layers.Input(shape=(16,), name='input') - y = keras.layers.Dense(16)(x) + x = keras.layers.Input(shape=(1,), name='input') + y = keras.layers.Dense(1, kernel_initializer='ones')(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) + initial_weights = model.get_weights() optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', - '/device:CPU:0']) + strategy = mirrored_strategy.MirroredStrategy( + ['/device:GPU:0', '/device:GPU:1']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - inputs = np.random.rand(10, 16) - targets = np.ones((10, 16), dtype=np.float32) + inputs = np.ones((10, 1), dtype=np.float32) + targets = np.ones((10, 1), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(8) - - hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1) - self.assertEqual(hist.history['acc'][0], 1) + dataset = dataset.repeat().batch(8) + hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) + self.assertAlmostEqual(hist.history['acc'][0], 0, 0) + model.set_weights(initial_weights) evaluate_output = model.evaluate(dataset, steps=20) - self.assertEqual(evaluate_output[1], 0) - - predict_output = model.predict(dataset, steps=1) - self.assertNotEqual(np.mean(predict_output), 0) + self.assertAlmostEqual(evaluate_output[1], 1, 0) + + inputs = np.ones((10, 1), dtype=np.float32) + predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs) + predict_dataset = predict_dataset.repeat().batch(5) + output = model.predict(predict_dataset, steps=10) + ref_output = np.ones((50, 1), dtype=np.float32) + self.assertArrayNear(output[0], ref_output, 1e-1) class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): -- GitLab From 9369994b4b2c4fe822d67a9f65384532cc09c99d Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Tue, 9 Oct 2018 19:06:55 -0700 Subject: [PATCH 175/411] Automated rollback of commit d78c747e9177fc93d43a580acef2b62eb1420859 PiperOrigin-RevId: 216463443 --- tensorflow/contrib/lite/python/BUILD | 2 ++ tensorflow/contrib/lite/python/lite_test.py | 14 +++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index be6c44d306..916788f215 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -73,6 +73,7 @@ py_test( data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"], srcs_version = "PY2AND3", tags = [ + "no_oss", "no_windows", ], deps = [ @@ -171,6 +172,7 @@ py_test( srcs = ["convert_saved_model_test.py"], srcs_version = "PY2AND3", tags = [ + "no_oss", "no_windows", ], visibility = ["//visibility:public"], diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index ef9bbded2a..d243a494f6 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -591,19 +591,11 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase): 'Unable to parse input file \'{}\'.'.format(graph_def_file), str(error.exception)) + # TODO(nupurgarg): Test model loading in open source. def _initObjectDetectionArgs(self): # Initializes the arguments required for the object detection model. - # Looks for the model file which is saved in a different location interally - # and externally. - filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb') - if not os.path.exists(filename): - filename = os.path.join( - resource_loader.get_root_dir_with_all_resources(), - '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb') - if not os.path.exists(filename): - raise IOError("File '{0}' does not exist.".format(filename)) - - self._graph_def_file = filename + self._graph_def_file = resource_loader.get_path_to_datafile( + 'testdata/tflite_graph.pb') self._input_arrays = ['normalized_input_image_tensor'] self._output_arrays = [ 'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1', -- GitLab From 93eef55c4d04af24a6c8080f34629db179634f07 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 19:07:52 -0700 Subject: [PATCH 176/411] Automated rollback of commit 9bd459e4ceba14f9bb1af98d52a109325de952e8 PiperOrigin-RevId: 216463491 --- .../contrib/lite/experimental/objc/BUILD | 94 ---- .../contrib/lite/experimental/objc/README.md | 10 - .../Configs/TensorFlowLiteObjc.tulsigen | 60 --- .../project.tulsiconf | 17 - .../experimental/objc/apis/TFLInterpreter.h | 188 -------- .../objc/apis/TFLInterpreterOptions.h | 37 -- .../objc/apis/TFLQuantizationParameters.h | 36 -- .../lite/experimental/objc/apis/TFLTensor.h | 77 --- .../experimental/objc/sources/TFLErrorUtil.h | 51 -- .../experimental/objc/sources/TFLErrorUtil.m | 45 -- .../objc/sources/TFLInterpreter.mm | 440 ------------------ .../objc/sources/TFLInterpreterOptions.m | 30 -- .../objc/sources/TFLQuantizationParameters.m | 23 - .../objc/sources/TFLTensor+Internal.h | 42 -- .../experimental/objc/sources/TFLTensor.m | 54 --- .../objc/tests/TFLInterpreterOptionsTests.m | 49 -- .../objc/tests/TFLInterpreterTests.m | 266 ----------- .../tools/pip_package/pip_smoke_test.py | 1 - 18 files changed, 1520 deletions(-) delete mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD delete mode 100644 tensorflow/contrib/lite/experimental/objc/README.md delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD deleted file mode 100644 index 236b96adb5..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/BUILD +++ /dev/null @@ -1,94 +0,0 @@ -# TensorFlow Lite Objective-C API. - -package(default_visibility = ["//visibility:private"]) - -licenses(["notice"]) # Apache 2.0 - -load("//tools/build_defs/apple:ios.bzl", "ios_unit_test") - -SOURCES = glob([ - "sources/*.h", - "sources/*.m", - "sources/*.mm", -]) - -API_HEADERS = glob([ - "apis/*.h", -]) - -MINIMUM_OS_VERSION = "8.0" - -# Compiler flags for building regular non-test libraries. -RELEASE_COPTS = [ - # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++. - "-Wall", - # Warns if functions, variables, and types marked with the deprecated attribute are being used. - "-Wdeprecated-declarations", - # Warns for errors in documentation. - "-Wdocumentation", - # Turns all warnings into errors. - "-Werror", - # Enables extra warning flags that are not enabled by -Wall. - "-Wextra", - # Warns if a global function is defined without a previous prototype declaration. - "-Wmissing-prototypes", - # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison. - "-Wno-sign-compare", - # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks. - "-Wno-unused-parameter", - # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable. - "-Wshadow", - # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of (). - "-Wstrict-prototypes", - # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet. - "-Wundeclared-selector", - - # Turn off warnings for headers not part of TensorFlow Lite Objective-C API. - "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/", -] - -# Compiler flags for building test libraries. -TEST_COPTS = RELEASE_COPTS + [ - # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument. - "-Wno-nonnull", - # Disables warning when a global or local variable or type declaration shadows another. - "-Wno-shadow", -] - -objc_library( - name = "TensorFlowLiteObjCLib", - srcs = SOURCES, - hdrs = API_HEADERS, - copts = RELEASE_COPTS, - deps = [ - "//tensorflow/contrib/lite/experimental/c:c_api", - ], - alwayslink = 1, -) - -ios_unit_test( - name = "TensorFlowLiteObjCTests", - size = "small", - minimum_os_version = MINIMUM_OS_VERSION, - deps = [":TensorFlowLiteObjCTestLib"], -) - -objc_library( - name = "TensorFlowLiteObjCTestLib", - testonly = 1, - srcs = glob([ - "tests/*.m", - ]), - hdrs = glob([ - "apis/*.h", - "sources/*.h", - "tests/*.h", - ]), - copts = TEST_COPTS, - resources = [ - "//tensorflow/contrib/lite:testdata/add.bin", - ], - deps = [ - ":TensorFlowLiteObjCLib", - ], -) diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md deleted file mode 100644 index e8f150b1e8..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# TensorFlow Lite Objective-C API - -## TensorFlowLiteObjc Tulsi Project - -Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by -running the following command in Terminal from the root source directory: - -```shell -generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj -``` diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen deleted file mode 100644 index babb5902d3..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen +++ /dev/null @@ -1,60 +0,0 @@ -{ - "sourceFilters" : [ - "third_party/tensorflow/contrib/lite", - "third_party/tensorflow/contrib/lite/experimental/c", - "third_party/tensorflow/contrib/lite/experimental/objc", - "third_party/tensorflow/contrib/lite/experimental/objc/apis", - "third_party/tensorflow/contrib/lite/experimental/objc/sources", - "third_party/tensorflow/contrib/lite/experimental/objc/tests", - "third_party/tensorflow/contrib/lite/kernels", - "third_party/tensorflow/contrib/lite/kernels/internal", - "third_party/tensorflow/contrib/lite/nnapi", - "third_party/tensorflow/contrib/lite/schema", - ], - "buildTargets" : [ - "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib", - "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests", - ], - "projectName" : "TensorFlowLiteObjC", - "optionSet" : { - "LaunchActionPreActionScript" : { - "p" : "$(inherited)" - }, - "BazelBuildStartupOptionsRelease" : { - "p" : "$(inherited)" - }, - "BazelBuildOptionsRelease" : { - "p" : "$(inherited)" - }, - "BazelBuildOptionsDebug" : { - "p" : "$(inherited)" - }, - "EnvironmentVariables" : { - "p" : "$(inherited)" - }, - "BuildActionPreActionScript" : { - "p" : "$(inherited)" - }, - "CommandlineArguments" : { - "p" : "$(inherited)" - }, - "TestActionPreActionScript" : { - "p" : "$(inherited)" - }, - "BazelBuildStartupOptionsDebug" : { - "p" : "$(inherited)" - }, - "BuildActionPostActionScript" : { - "p" : "$(inherited)" - }, - "TestActionPostActionScript" : { - "p" : "$(inherited)" - }, - "LaunchActionPostActionScript" : { - "p" : "$(inherited)" - } - }, - "additionalFilePaths" : [ - "third_party/tensorflow/contrib/lite/experimental/objc/BUILD", - ] -} diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf deleted file mode 100644 index 00299cd4cf..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf +++ /dev/null @@ -1,17 +0,0 @@ -{ - "configDefaults" : { - "optionSet" : { - "BazelBuildOptionsDebug" : { - "p" : "--ios_minimum_os=8.0" - }, - "BazelBuildOptionsRelease" : { - "p" : "--ios_minimum_os=8.0" - }, - } - }, - "projectName" : "TensorFlowLiteObjC", - "packages" : [ - "third_party/tensorflow/contrib/lite/experimental/objc" - ], - "workspaceRoot" : "../../../../../../.." -} diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h deleted file mode 100644 index c07ffc06ff..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@class TFLInterpreterOptions; -@class TFLTensor; - -NS_ASSUME_NONNULL_BEGIN - -/** - * @enum TFLInterpreterErrorCode - * This enum specifies various error codes related to `TFLInterpreter`. - */ -typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) { - /** Provided tensor index is invalid. */ - TFLInterpreterErrorCodeInvalidTensorIndex, - - /** Input data has invalid byte size. */ - TFLInterpreterErrorCodeInvalidInputByteSize, - - /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */ - TFLInterpreterErrorCodeInvalidShape, - - /** Provided model cannot be loaded. */ - TFLInterpreterErrorCodeFailedToLoadModel, - - /** Failed to create `TFLInterpreter`. */ - TFLInterpreterErrorCodeFailedToCreateInterpreter, - - /** Failed to invoke `TFLInterpreter`. */ - TFLInterpreterErrorCodeFailedToInvoke, - - /** Failed to retrieve a tensor. */ - TFLInterpreterErrorCodeFailedToGetTensor, - - /** Failed to resize an input tensor. */ - TFLInterpreterErrorCodeFailedToResizeInputTensor, - - /** Failed to copy data into an input tensor. */ - TFLInterpreterErrorCodeFailedToCopyDataToInputTensor, - - /** Failed to get data from an output tensor. */ - TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor, - - /** Failed to allocate memory for tensors. */ - TFLInterpreterErrorCodeFailedToAllocateTensors, - - /** Operaton not allowed without allocating memory for tensors first. */ - TFLInterpreterErrorCodeAllocateTensorsRequired, - - /** Operaton not allowed without invoking the interpreter first. */ - TFLInterpreterErrorCodeInvokeInterpreterRequired, -}; - -/** - * A TensorFlow Lite model interpreter. - */ -@interface TFLInterpreter : NSObject - -/** The total number of input tensors. 0 if the interpreter creation failed. */ -@property(nonatomic, readonly) NSUInteger inputTensorCount; - -/** The total number of output tensors. 0 if the interpreter creation failed. */ -@property(nonatomic, readonly) NSUInteger outputTensorCount; - -/** Unavailable. */ -- (instancetype)init NS_UNAVAILABLE; - -/** - * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the - * default interpreter options. - * - * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. - * - * @return A new instance of `TFLInterpreter` with the given model and the default interpreter - * options. - */ -- (instancetype)initWithModelPath:(NSString *)modelPath; - -/** - * Initializes a new TensorFlow Lite interpreter instance with the given model file path and - * options. - * - * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. - * @param options Options to use for configuring the TensorFlow Lite interpreter. - * - * @return A new instance of `TFLInterpreter` with the given model and options. - */ -- (instancetype)initWithModelPath:(NSString *)modelPath - options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER; - -/** - * Invokes the interpreter to run inference. - * - * @param error An optional error parameter populated when there is an error in invoking the - * interpreter. - * - * @return Whether the invocation is successful. Returns NO if an error occurred. - */ -- (BOOL)invokeWithError:(NSError **)error; - -/** - * Returns the input tensor at the given index. - * - * @param index The index of an input tensor. - * @param error An optional error parameter populated when there is an error in looking up the input - * tensor. - * - * @return The input tensor at the given index. `nil` if there is an error. - */ -- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error; - -/** - * Returns the output tensor at the given index. - * - * @param index The index of an output tensor. - * @param error An optional error parameter populated when there is an error in looking up the - * output tensor. - * - * @return The output tensor at the given index. `nil` if there is an error. - */ -- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error; - -/** - * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned - * integers). - * - * @param index The index of an input tensor. - * @param shape Shape that the given input tensor should be resized to. It should be an array of - * positive unsigned integer(s) containing the size of each dimension. - * @param error An optional error parameter populated when there is an error in resizing the input - * tensor. - * - * @return Whether the input tensor was resized successfully. Returns NO if an error occurred. - */ -- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index - toShape:(NSArray *)shape - error:(NSError **)error; - -/** - * Copies the given data into the input tensor at the given index. This is allowed only before the - * interpreter is invoked. - * - * @param data The data to set. The byte size of the data must match what's required by the given - * input tensor. - * @param index The index of an input tensor. - * @param error An optional error parameter populated when there is an error in setting the data. - * - * @return Whether the data was set into the input tensor successfully. Returns NO if an error - * occurred. - */ -- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error; - -/** - * Gets the data from the output tensor at the given index. The interpreter invocation has to - * complete before the data can be retrieved from an output tensor. - * - * @param index The index of an output tensor. - * @param error An optional error parameter populated when there is an error in getting the data. - * - * @return The data of the output tensor at the given index. `nil` if there is an error. - */ -- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error; - -/** - * Allocates memory for tensors. - * - * @param error An optional error parameter populated when there is an error in allocating memory. - * - * @return Whether memory allocation is successful. Returns NO if an error occurred. - */ -- (BOOL)allocateTensorsWithError:(NSError **)error; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h deleted file mode 100644 index 6461fbf017..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -NS_ASSUME_NONNULL_BEGIN - -/** Custom configuration options for a TensorFlow Lite interpreter. */ -@interface TFLInterpreterOptions : NSObject - -/** - * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting - * TensorFlow Lite to optimize the threading decision). - */ -@property(nonatomic) NSUInteger numberOfThreads; - -/** - * Initializes a new instance of `TFLInterpreterOptions`. - * - * @return A new instance of `TFLInterpreterOptions`. - */ -- (instancetype)init NS_DESIGNATED_INITIALIZER; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h deleted file mode 100644 index 3d5cf793c5..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -NS_ASSUME_NONNULL_BEGIN - -/** - * Parameters for asymmetric quantization. Quantized values can be converted to float values using: - * `realValue = scale * (quantizedValue - zeroPoint)`. - */ -@interface TFLQuantizationParameters : NSObject - -/** Scale of asymmetric quantization. */ -@property(nonatomic, readonly) float scale; - -/** Zero point of asymmetric quantization. */ -@property(nonatomic, readonly) int32_t zeroPoint; - -/** Unavailable. */ -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h deleted file mode 100644 index d08b8fc0e9..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@class TFLQuantizationParameters; - -NS_ASSUME_NONNULL_BEGIN - -/** - * @enum TFLTensorDataType - * This enum specifies supported TensorFlow Lite tensor data types. - */ -typedef NS_ENUM(NSUInteger, TFLTensorDataType) { - /** Tensor data type not available. This indicates an error with the model. */ - TFLTensorDataTypeNoType, - - /** 32-bit single precision floating point. */ - TFLTensorDataTypeFloat32, - - /** 32-bit signed integer. */ - TFLTensorDataTypeInt32, - - /** 8-bit unsigned integer. */ - TFLTensorDataTypeUInt8, - - /** 64-bit signed integer. */ - TFLTensorDataTypeInt64, - - /** Boolean. */ - TFLTensorDataTypeBool, - - /** 16-bit signed integer. */ - TFLTensorDataTypeInt16, -}; - -/** - * An input or output tensor in a TensorFlow Lite model. - */ -@interface TFLTensor : NSObject - -/** Name of the tensor. */ -@property(nonatomic, readonly, copy) NSString *name; - -/** Data type of the tensor. */ -@property(nonatomic, readonly) TFLTensorDataType dataType; - -/** - * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each - * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is - * [2, 2, 3]. - */ -@property(nonatomic, readonly, copy) NSArray *shape; - -/** Number of bytes for the tensor data. */ -@property(nonatomic, readonly) NSUInteger byteSize; - -/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */ -@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters; - -/** Unavailable. */ -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h deleted file mode 100644 index b6fd4763d6..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" - -NS_ASSUME_NONNULL_BEGIN - -/** Helper utility for error reporting. */ -@interface TFLErrorUtil : NSObject - -/** - * Creates and returns an interpreter error with the given error code and description. - * - * @param code Error code. - * @param description Error description. - * - * @return The created interpreter error with the given error code and description. - */ -+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code - description:(NSString *)description; - -/** - * Creates and saves an interpreter error with the given error code and description. - * - * @param code Error code. - * @param description Error description. - * @param error Pointer to where to save the created error. If `nil`, no error will be saved. - */ -+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code - description:(NSString *)description - error:(NSError **)error; - -/** Unavailable. */ -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m deleted file mode 100644 index 756d69481c..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "TFLErrorUtil.h" - -NS_ASSUME_NONNULL_BEGIN - -/** Error domain of TensorFlow Lite interpreter related errors. */ -static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter"; - -@implementation TFLErrorUtil - -#pragma mark - Public - -+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code - description:(NSString *)description { - return [NSError errorWithDomain:TFLInterpreterErrorDomain - code:code - userInfo:@{NSLocalizedDescriptionKey : description}]; -} - -+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code - description:(NSString *)description - error:(NSError **)error { - if (error) { - *error = [NSError errorWithDomain:TFLInterpreterErrorDomain - code:code - userInfo:@{NSLocalizedDescriptionKey : description}]; - } -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm deleted file mode 100644 index 0f940a5cf3..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" - -#import "TFLErrorUtil.h" -#import "TFLTensor+Internal.h" -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" - -#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h" - -NS_ASSUME_NONNULL_BEGIN - -/** - * @enum TFLTensorType - * This enum specifies input or output tensor types. - */ -typedef NS_ENUM(NSUInteger, TFLTensorType) { - /** Input tensor type. */ - TFLTensorTypeInput, - - /** Output tensor type. */ - TFLTensorTypeOutput, -}; - -// Names used for indicating input or output in error messages. -static NSString *const kTFLInputDirection = @"input"; -static NSString *const kTFLOutputDirection = @"output"; - -/** - * Error reporter for TFLInterpreter. - * - * @param user_data User data. Not used. - * @param format Error message which may contain argument formatting specifiers. - * @param args Values of the arguments in the error message. - */ -static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) { - NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]); -} - -@interface TFLInterpreter () - -/** TFL_Interpreter backed by C API. */ -@property(nonatomic, nullable) TFL_Interpreter *interpreter; - -/** - * An error in initializing the interpreter. If not `nil`, this error will be reported when the - * interpreter is used. - */ -@property(nonatomic, nullable) NSError *initializationError; - -@end - -@implementation TFLInterpreter - -#pragma mark - NSObject - -- (void)dealloc { - TFL_DeleteInterpreter(_interpreter); -} - -#pragma mark - Public - -- (instancetype)initWithModelPath:(NSString *)modelPath { - return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]]; -} - -- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options { - self = [super init]; - - if (self != nil) { - const char *modelPathCString = modelPath.UTF8String; - NSString *pathErrorString = - [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath]; - if (modelPathCString == nullptr) { - _initializationError = - [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel - description:pathErrorString]; - return self; - } - - TFL_Model *model = TFL_NewModelFromFile(modelPathCString); - if (model == nullptr) { - _initializationError = - [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel - description:pathErrorString]; - return self; - } - - TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions(); - if (cOptions == nullptr) { - _initializationError = - [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter - description:@"Failed to create the interpreter."]; - TFL_DeleteModel(model); - return self; - } - - if (options.numberOfThreads > 0) { - TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads); - } - TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr); - - _interpreter = TFL_NewInterpreter(model, cOptions); - if (_interpreter == nullptr) { - _initializationError = - [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter - description:@"Failed to create the interpreter."]; - } else { - _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter); - _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter); - if (_inputTensorCount <= 0 || _outputTensorCount <= 0) { - _initializationError = - [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter - description:@"Failed to create the interpreter."]; - } - } - TFL_DeleteInterpreterOptions(cOptions); - TFL_DeleteModel(model); - } - - return self; -} - -- (BOOL)invokeWithError:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return NO; - } - - if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) { - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke - description:@"Failed to invoke the interpreter." - error:error]; - return NO; - } - - return YES; -} - -- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return nil; - } - - if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { - return nil; - } - - return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error]; -} - -- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return nil; - } - - if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) { - return nil; - } - - return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error]; -} - -- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index - toShape:(NSArray *)shape - error:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return NO; - } - - if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { - return NO; - } - - if (shape.count == 0) { - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape - description:@"Invalid shape. Must not be empty." - error:error]; - return NO; - } - - int cDimensions[self.inputTensorCount]; - for (int d = 0; d < shape.count; ++d) { - int dimension = shape[d].intValue; - if (dimension <= 0) { - NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers."; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape - description:errorDescription - error:error]; - return NO; - } - cDimensions[d] = dimension; - } - - if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions, - (int32_t)shape.count) != kTfLiteOk) { - NSString *errorDescription = [NSString - stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor - description:errorDescription - error:error]; - return NO; - } - - return YES; -} - -- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return NO; - } - - if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { - return NO; - } - - TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index); - if (tensor == nullptr) { - NSString *errorDescription = [NSString - stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor - description:errorDescription - error:error]; - return NO; - } - - NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor); - if (data.length != byteSize) { - NSString *errorDescription = [NSString - stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).", - (unsigned long)index, byteSize, (unsigned long)data.length]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize - description:errorDescription - error:error]; - return NO; - } - - if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) { - NSString *errorDescription = - [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).", - (unsigned long)index]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor - description:errorDescription - error:error]; - return NO; - } - - return YES; -} - -- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return nil; - } - - if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) { - return nil; - } - - const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index); - if (tensor == nullptr) { - NSString *errorDescription = [NSString - stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index]; - [TFLErrorUtil - saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor - description:errorDescription - error:error]; - return nil; - } - - void *bytes = TFL_TensorData(tensor); - NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor); - if (bytes == nullptr || byteSize == 0) { - NSString *errorDescription = [NSString - stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index]; - [TFLErrorUtil - saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor - description:errorDescription - error:error]; - return nil; - } - - return [NSData dataWithBytes:bytes length:byteSize]; -} - -- (BOOL)allocateTensorsWithError:(NSError **)error { - if (self.initializationError != nil) { - [self saveInitializationErrorToDestination:error]; - return NO; - } - - if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) { - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors - description:@"Failed to allocate memory for tensors." - error:error]; - return NO; - } - return YES; -} - -#pragma mark - Private - -- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type - atIndex:(NSUInteger)index - error:(NSError **)error { - const TFL_Tensor *tensor = nullptr; - NSString *tensorType; - switch (type) { - case TFLTensorTypeInput: - tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index); - tensorType = kTFLInputDirection; - break; - case TFLTensorTypeOutput: - tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index); - tensorType = kTFLOutputDirection; - break; - } - - if (tensor == nullptr) { - NSString *errorDescription = - [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType, - (unsigned long)index]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor - description:errorDescription - error:error]; - return nil; - } - - const char *cName = TFL_TensorName(tensor); - if (cName == nullptr) { - NSString *errorDescription = - [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType, - (unsigned long)index]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor - description:errorDescription - error:error]; - return nil; - } - NSString *name = [NSString stringWithUTF8String:cName]; - - TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)]; - - int32_t rank = TFL_TensorNumDims(tensor); - if (rank <= 0) { - NSString *errorDescription = - [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType, - (unsigned long)index, rank]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor - description:errorDescription - error:error]; - return nil; - } - NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank]; - for (int32_t d = 0; d < rank; d++) { - int32_t dimension = TFL_TensorDim(tensor, d); - if (dimension <= 0) { - NSString *errorDescription = - [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).", - tensorType, (unsigned long)index, d, dimension]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor - description:errorDescription - error:error]; - return nil; - } - shape[d] = @((NSUInteger)dimension); - } - - // TODO: Set quantization parameters when C API supports it. - return [[TFLTensor alloc] initWithName:name - dataType:dataType - shape:shape - byteSize:(NSUInteger)TFL_TensorByteSize(tensor) - quantizationParameters:nil]; -} - -- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType { - switch (cTensorType) { - case kTfLiteFloat32: - return TFLTensorDataTypeFloat32; - case kTfLiteInt32: - return TFLTensorDataTypeInt32; - case kTfLiteUInt8: - return TFLTensorDataTypeUInt8; - case kTfLiteInt64: - return TFLTensorDataTypeInt64; - case kTfLiteBool: - return TFLTensorDataTypeBool; - case kTfLiteInt16: - return TFLTensorDataTypeInt16; - case kTfLiteNoType: - case kTfLiteString: - case kTfLiteComplex64: - // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API. - return TFLTensorDataTypeNoType; - } -} - -- (void)saveInitializationErrorToDestination:(NSError **)destination { - if (destination != NULL) { - *destination = self.initializationError; - } -} - -- (BOOL)isValidTensorIndex:(NSUInteger)index - belowLimit:(NSUInteger)totalTensorCount - error:(NSError **)error { - if (index >= totalTensorCount) { - NSString *errorDescription = - [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).", - (unsigned long)index, (unsigned long)(totalTensorCount - 1)]; - [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex - description:errorDescription - error:error]; - return NO; - } - - return YES; -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m deleted file mode 100644 index 1776688288..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" - -NS_ASSUME_NONNULL_BEGIN - -@implementation TFLInterpreterOptions - -#pragma mark - Public - -- (instancetype)init { - self = [super init]; - return self; -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m deleted file mode 100644 index 190f0479ce..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h" - -NS_ASSUME_NONNULL_BEGIN - -@implementation TFLQuantizationParameters - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h deleted file mode 100644 index f2f13e5e5f..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" - -NS_ASSUME_NONNULL_BEGIN - -@interface TFLTensor (Internal) - -/** - * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters. - * - * @param name Name of the tensor. - * @param dataType Data type of the tensor. - * @param shape Shape of the tensor. - * @param byteSize Size of the tensor data in number of bytes. - * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not - * use quantization. - * - * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization - * parameters. - */ -- (instancetype)initWithName:(NSString *)name - dataType:(TFLTensorDataType)dataType - shape:(NSArray *)shape - byteSize:(NSUInteger)byteSize - quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters; - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m deleted file mode 100644 index adb1c5ad2c..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" - -#import "TFLTensor+Internal.h" - -NS_ASSUME_NONNULL_BEGIN - -@interface TFLTensor () - -// Redefines readonly properties. -@property(nonatomic, copy) NSString *name; -@property(nonatomic) TFLTensorDataType dataType; -@property(nonatomic, copy) NSArray *shape; -@property(nonatomic) NSUInteger byteSize; -@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters; - -@end - -@implementation TFLTensor - -#pragma mark - TFLTensor (Internal) - -- (instancetype)initWithName:(NSString *)name - dataType:(TFLTensorDataType)dataType - shape:(NSArray *)shape - byteSize:(NSUInteger)byteSize - quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters { - self = [super init]; - if (self != nil) { - _name = [name copy]; - _dataType = dataType; - _shape = [shape copy]; - _byteSize = byteSize; - _quantizationParameters = quantizationParameters; - } - return self; -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m deleted file mode 100644 index 17c495fa18..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" - -#import - -NS_ASSUME_NONNULL_BEGIN - -/** - * Unit tests for TFLInterpreterOptions. - */ -@interface TFLInterpreterOptionsTests : XCTestCase -@end - -@implementation TFLInterpreterOptionsTests - -#pragma mark - Tests - -- (void)testInit { - TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; - XCTAssertNotNil(options); - XCTAssertEqual(options.numberOfThreads, 0); -} - -- (void)testSetNumberOfThread { - TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; - options.numberOfThreads = 2; - XCTAssertEqual(options.numberOfThreads, 2); - options.numberOfThreads = 0; - XCTAssertEqual(options.numberOfThreads, 0); - options.numberOfThreads = 3; - XCTAssertEqual(options.numberOfThreads, 3); -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m deleted file mode 100644 index 9e6319a732..0000000000 --- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h" - -#import - -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h" -#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h" - -NS_ASSUME_NONNULL_BEGIN - -/** Model resource name. */ -static NSString *const kAddModelResourceName = @"add"; - -/** Model resource type. */ -static NSString *const kAddModelResourceType = @"bin"; - -/** Rank of the input and output tensor in the Add model. */ -static const NSUInteger kAddModelTensorRank = 1U; - -/** Size of the first (and only) dimension of the input and output tensor in the Add model. */ -static const NSUInteger kAddModelTensorFirstDimensionSize = 2U; - -/** Invalid input tensor index. */ -static const NSUInteger kInvalidInputTensorIndex = 1U; - -/** Invalid output tensor index. */ -static const NSUInteger kInvalidOutputTensorIndex = 1U; - -/** Accurary used in comparing floating numbers. */ -static const float kTestAccuracy = 1E-5F; - -/** - * Unit tests for TFLInterpreter. - */ -@interface TFLInterpreterTests : XCTestCase - -/** Absolute path of the Add model resource. */ -@property(nonatomic, nullable) NSString *modelPath; - -/** Default interpreter using the Add model. */ -@property(nonatomic, nullable) TFLInterpreter *interpreter; - -@end - -@implementation TFLInterpreterTests - -#pragma mark - XCTestCase - -- (void)setUp { - [super setUp]; - - NSBundle *bundle = [NSBundle bundleForClass:[self class]]; - self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType]; - self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath]; - XCTAssertNotNil(self.interpreter); - XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]); -} - -- (void)tearDown { - self.modelPath = nil; - self.interpreter = nil; - - [super tearDown]; -} - -#pragma mark - Tests - -- (void)testSuccessfulFullRun { - // Shape for both input and output tensor. - NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; - shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; - - // Creates the interpreter options. - TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; - XCTAssertNotNil(options); - options.numberOfThreads = 2; - - // Creates the interpreter. - TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath - options:options]; - XCTAssertNotNil(customInterpreter); - - // Allocates memory for tensors. - NSError *error; - XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); - XCTAssertNil(error); - - // Verifies input and output tensor counts. - XCTAssertEqual(customInterpreter.inputTensorCount, 1); - XCTAssertEqual(customInterpreter.outputTensorCount, 1); - - // Resizes the intput tensor. - XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); - XCTAssertNil(error); - - // Re-allocates memory for tensors. - XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); - XCTAssertNil(error); - - // Verifies the input tensor. - TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error]; - XCTAssertNotNil(inputTensor); - XCTAssertNil(error); - XCTAssertTrue([inputTensor.name isEqualToString:@"input"]); - XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32); - XCTAssertTrue([shape isEqualToArray:inputTensor.shape]); - XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize); - - // Copies the input data. - NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; - float one = 1.f; - float three = 3.f; - [inputData appendBytes:&one length:sizeof(float)]; - [inputData appendBytes:&three length:sizeof(float)]; - XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); - XCTAssertNil(error); - - // Invokes the interpreter. - XCTAssertTrue([customInterpreter invokeWithError:&error]); - XCTAssertNil(error); - - // Verifies the output tensor. - TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error]; - XCTAssertNotNil(outputTensor); - XCTAssertNil(error); - XCTAssertTrue([outputTensor.name isEqualToString:@"output"]); - XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32); - XCTAssertTrue([shape isEqualToArray:outputTensor.shape]); - XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize); - - // Tries to query an invalid output tensor index. - TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex - error:&error]; - XCTAssertNil(invalidOutputTensor); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); - - // Gets the output tensor data. - error = nil; - NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error]; - XCTAssertNotNil(outputData); - XCTAssertNil(error); - float output[kAddModelTensorFirstDimensionSize]; - [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)]; - XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy); - XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy); -} - -- (void)testInitWithModelPath_invalidPath { - // Shape for both input and output tensor. - NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; - shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; - - // Creates the interpreter. - TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"]; - XCTAssertNotNil(brokenInterpreter); - XCTAssertEqual(brokenInterpreter.inputTensorCount, 0); - XCTAssertEqual(brokenInterpreter.outputTensorCount, 0); - - // Allocates memory for tensors. - NSError *error; - XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Resizes the intput tensor. - XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Verifies the input tensor. - TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error]; - XCTAssertNil(inputTensor); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Copies the input data. - NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; - float one = 1.f; - float three = 3.f; - [inputData appendBytes:&one length:sizeof(float)]; - [inputData appendBytes:&three length:sizeof(float)]; - XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Invokes the interpreter. - XCTAssertFalse([brokenInterpreter invokeWithError:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Verifies the output tensor. - TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error]; - XCTAssertNil(outputTensor); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); - - // Gets the output tensor data. - NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error]; - XCTAssertNil(outputData); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); -} - -- (void)testInvoke_beforeAllocation { - TFLInterpreter *interpreterWithoutAllocation = - [[TFLInterpreter alloc] initWithModelPath:self.modelPath]; - XCTAssertNotNil(interpreterWithoutAllocation); - - NSError *error; - XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke); -} - -- (void)testInputTensorAtIndex_invalidIndex { - NSError *error; - TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex - error:&error]; - XCTAssertNil(inputTensor); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); -} - -- (void)testResizeInputTensorAtIndex_invalidIndex { - NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; - shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; - NSError *error; - XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex - toShape:shape - error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); -} - -- (void)testResizeInputTensorAtIndex_emptyShape { - NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0]; - NSError *error; - XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); -} - -- (void)testResizeInputTensorAtIndex_zeroDimensionSize { - NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; - shape[0] = [NSNumber numberWithUnsignedInteger:0]; - NSError *error; - XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); -} - -- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize { - NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; - float one = 1.f; - float three = 3.f; - [inputData appendBytes:&one length:sizeof(float)]; - [inputData appendBytes:&three length:(sizeof(float) - 1)]; - NSError *error; - XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]); - XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize); -} - -@end - -NS_ASSUME_NONNULL_END diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index 31b68c8f00..c6ef82ccdc 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -105,7 +105,6 @@ BLACKLIST = [ "//tensorflow/contrib/timeseries/python/timeseries:test_utils", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", # pylint:disable=line-too-long "//tensorflow/contrib/image:sparse_image_warp_test_data", - "//tools/build_defs/apple:ios.bzl", ] -- GitLab From 58fcfc98cd59ae3952399fc55380b8733df08df9 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Tue, 9 Oct 2018 19:41:35 -0700 Subject: [PATCH 177/411] [XLA] Add documentation and HLO-level support for multi-value sort. No support in any of the backends, and not yet exposed through XlaBuilder. PiperOrigin-RevId: 216465753 --- .../xla/service/algebraic_simplifier.cc | 2 +- .../xla/service/algebraic_simplifier_test.cc | 12 ++++++--- .../service/bfloat16_normalization_test.cc | 2 +- .../xla/service/hlo_dataflow_analysis_test.cc | 3 ++- .../compiler/xla/service/hlo_instruction.cc | 17 ++++++------ .../compiler/xla/service/hlo_instruction.h | 4 +-- .../compiler/xla/service/hlo_instructions.cc | 9 +++---- .../compiler/xla/service/hlo_instructions.h | 2 +- tensorflow/compiler/xla/service/hlo_parser.cc | 20 +++----------- .../compiler/xla/service/hlo_parser_test.cc | 15 +++++++++++ .../compiler/xla/service/hlo_verifier.cc | 22 +++++++++------- .../compiler/xla/service/shape_inference.cc | 25 +++++++++++------- .../xla/service/shape_inference_test.cc | 26 ++++++++++++++++++- .../service/tuple_points_to_analysis_test.cc | 3 ++- tensorflow/compiler/xla/tests/test_utils.cc | 6 +++-- 15 files changed, 104 insertions(+), 64 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 86d9dbea90..ca71f2cc12 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2209,7 +2209,7 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) { } // If it is key/value sort, the output of sort is a tuple. return ReplaceWithNewInstruction( - sort, HloInstruction::CreateTuple({operand, sort->mutable_operand(1)})); + sort, HloInstruction::CreateTuple(sort->operands())); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 2047f894b4..42d1f337dc 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2133,16 +2133,20 @@ TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) { Shape values_shape = ShapeUtil::MakeShape(S32, {5, 0}); auto keys = builder.AddInstruction( HloInstruction::CreateParameter(0, keys_shape, "keys")); - auto values = builder.AddInstruction( - HloInstruction::CreateParameter(1, values_shape, "values")); + auto values0 = builder.AddInstruction( + HloInstruction::CreateParameter(1, values_shape, "values0")); + auto values1 = builder.AddInstruction( + HloInstruction::CreateParameter(2, values_shape, "values1")); builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape, values_shape}), 0, + keys, {values0, values1})); auto module = CreateNewModule(); HloComputation* computation = module->AddEntryComputation(builder.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), op::Tuple(keys, values)); + EXPECT_THAT(computation->root_instruction(), + op::Tuple(keys, values0, values1)); } // Used for TEST_Ps that test merging (or not) of a kPad instruction into a diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index cef0eba14e..2411fdcb20 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -284,7 +284,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) { HloInstruction::CreateParameter(1, s32_shape, "value")); HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, value)); + ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, {value})); HloInstruction* gte = builder.AddInstruction( HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0)); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index d27786d160..909853106d 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -2346,7 +2346,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { auto values = builder.AddInstruction( HloInstruction::CreateParameter(1, values_shape, "values")); auto sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, + {values})); BuildModuleAndRunAnalysis(builder.Build()); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 09bcf8a9e7..c317e9e3b4 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -195,17 +195,16 @@ StatusOr> HloInstruction::CreateFromProto( } break; case HloOpcode::kSort: { - TF_RET_CHECK(proto.operand_ids_size() == 1 || - proto.operand_ids_size() == 2) - << "Sort instruction should have 1 or 2 operands but has " + TF_RET_CHECK(proto.operand_ids_size() >= 1) + << "Sort instruction should have at least 1 operand but has " << proto.operand_ids_size(); TF_RET_CHECK(proto.dimensions().size() == 1) << "Sort instruction should have 1 dimension"; - HloInstruction* keys = operands(0); - HloInstruction* values = - proto.operand_ids_size() == 2 ? operands(1) : nullptr; - instruction = - CreateSort(proto.shape(), proto.dimensions(0), keys, values); + auto sort_operands = all_operands(); + HloInstruction* keys = sort_operands[0]; + instruction = CreateSort( + proto.shape(), proto.dimensions(0), keys, + absl::Span(sort_operands).subspan(1)); break; } case HloOpcode::kTranspose: @@ -1078,7 +1077,7 @@ HloInstruction::CreateBroadcastSequence( /* static */ std::unique_ptr HloInstruction::CreateSort( const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values) { + absl::Span values) { return absl::make_unique(shape, dimension, keys, values); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 44f776ebac..93ff04b1e4 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -670,10 +670,10 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, absl::Span dimensions); - // Creates a sort op, with a keys operand, and an optional values operand. + // Creates a sort op, with a keys operand, and optional values operands. static std::unique_ptr CreateSort( const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values = nullptr); + absl::Span values = {}); // Creates a while instruction, given a condition computation, a body // computation, and the initial value for the input of the computations. For diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 2ec233eaec..179ace2cdb 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -600,11 +600,11 @@ std::unique_ptr HloReduceInstruction::CloneWithNewOperandsImpl( HloSortInstruction::HloSortInstruction(const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values) + absl::Span values) : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) { AppendOperand(keys); - if (values) { - AppendOperand(values); + for (auto* value : values) { + AppendOperand(value); } } @@ -633,9 +633,8 @@ std::unique_ptr HloSortInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { HloInstruction* keys = new_operands[0]; - HloInstruction* values = new_operands.size() == 2 ? new_operands[1] : nullptr; return absl::make_unique(shape, dimensions(0), keys, - values); + new_operands.subspan(1)); } HloTransposeInstruction::HloTransposeInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 4c5fc759a3..3a0b7490dc 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -418,7 +418,7 @@ class HloSortInstruction : public HloInstruction { public: explicit HloSortInstruction(const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values = nullptr); + absl::Span values = {}); // Returns the dimension sizes or numbers associated with this instruction. const std::vector& dimensions() const override { return dimensions_; } int64 dimensions(int64 index) const override { return dimensions()[index]; } diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 96f9ff6654..128113f7a5 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -839,8 +839,6 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, break; } case HloOpcode::kSort: { - auto loc = lexer_.GetLoc(); - optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; @@ -848,20 +846,10 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, dimensions->size() != 1) { return false; } - switch (operands.size()) { - case 1: - instruction = builder->AddInstruction(HloInstruction::CreateSort( - shape, dimensions->at(0), /*keys=*/operands[0])); - break; - case 2: - instruction = builder->AddInstruction(HloInstruction::CreateSort( - shape, dimensions->at(0), - /*keys=*/operands[0], /*values=*/operands[1])); - break; - default: - return Error(loc, StrCat("expects either 1 or 2 operands, but has ", - operands.size(), " operands")); - } + instruction = builder->AddInstruction(HloInstruction::CreateSort( + shape, dimensions->at(0), + /*keys=*/operands[0], + /*values=*/absl::Span(operands).subspan(1))); break; } case HloOpcode::kTuple: { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 17538c05bc..ef2e74588c 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1003,6 +1003,21 @@ ENTRY Sort { ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}) sort(keys, values), dimensions={0} } +)" +}, +// Sort (Key, Value, Value, Value) +{ +"SortManyValues", +R"(HloModule sort + +ENTRY Sort { + keys = f32[1024,16]{0,1} parameter(0) + values.0 = s32[1024,16]{0,1} parameter(1) + values.1 = u32[1024,16]{0,1} parameter(2) + values.2 = f32[1024,16]{0,1} parameter(3) + ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}, u32[1024,16]{0,1}, f32[1024,16]{0,1}) sort(keys, values.0, values.1, values.2), dimensions={0} +} + )" }, // Conditional diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 620458855f..a1f668921d 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -266,18 +266,20 @@ Status ShapeVerifier::HandleReverse(HloInstruction* reverse) { } Status ShapeVerifier::HandleSort(HloInstruction* sort) { - if (sort->operand_count() < 1 || sort->operand_count() > 2) { - return InternalError("Expected 1 or 2 operands for %s instruction: %s", + if (sort->operand_count() < 1) { + return InternalError("Expected at least 1 operand for %s instruction: %s", HloOpcodeString(sort->opcode()), sort->ToString()); } - if (sort->operand_count() == 2 && - !ShapeUtil::SameDimensions(sort->operand(0)->shape(), - sort->operand(1)->shape())) { - return InternalError( - "Expected sort to have to have the same dimensions for the keys and " - "the values. Keys shape is: %s\n, Values shape is: %s", - StringifyShape(sort->operand(0)->shape()), - StringifyShape(sort->operand(1)->shape())); + for (int64 operand = 1; operand < sort->operand_count(); ++operand) { + if (!ShapeUtil::SameDimensions(sort->operand(0)->shape(), + sort->operand(operand)->shape())) { + return InternalError( + "Expected sort to have to have the same dimensions for the keys " + "and the values. Keys shape is: %s\n, Values shape (operand index " + "%lld) is: %s", + StringifyShape(sort->operand(0)->shape()), operand, + StringifyShape(sort->operand(operand)->shape())); + } } return CheckVariadicShape(sort); } diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index e379911462..aa49f98bcf 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1029,17 +1029,22 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, case HloOpcode::kSort: { if (operand_shapes.size() == 1) { return *operand_shapes[0]; - } else if (operand_shapes.size() == 2) { - if (!ShapeUtil::SameDimensions(*operand_shapes[0], - *operand_shapes[1])) { - return InvalidArgument( - "Sort keys and values dimensions must match. " - "Keys shape is: %s\n, Values shape is: %s", - ShapeUtil::HumanString(*operand_shapes[0]), - ShapeUtil::HumanString(*operand_shapes[1])); + } else { + for (int64 operand = 1; operand < operand_shapes.size(); ++operand) { + if (!ShapeUtil::SameDimensions(*operand_shapes[0], + *operand_shapes[operand])) { + return InvalidArgument( + "Sort keys and values dimensions must match. " + "Keys shape is: %s\n, Values shape (operand index %lld) is: %s", + ShapeUtil::HumanString(*operand_shapes[0]), operand, + ShapeUtil::HumanString(*operand_shapes[operand])); + } + } + std::vector operand_shape_values; + for (const Shape* operand_shape : operand_shapes) { + operand_shape_values.push_back(*operand_shape); } - return ShapeUtil::MakeTupleShape( - {*operand_shapes[0], *operand_shapes[1]}); + return ShapeUtil::MakeTupleShape(operand_shape_values); } return InvalidArgument("Unexpected number of operands for sort"); } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 864ed43118..7b65e8c1c9 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1618,13 +1618,37 @@ TEST_F(ShapeInferenceTest, BadSort) { auto values = ShapeUtil::MakeShape(F32, {5}); StatusOr statusor = ShapeInference::InferVariadicOpShape(HloOpcode::kSort, {&keys, &values}); - ASSERT_FALSE(statusor.ok()); + EXPECT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("dimensions must match")) + << statusor.status(); +} +TEST_F(ShapeInferenceTest, BadSortValuesMismatch) { + auto keys = ShapeUtil::MakeShape(F32, {4}); + auto values_good = ShapeUtil::MakeShape(F32, {4}); + auto values_bad = ShapeUtil::MakeShape(F32, {5}); + StatusOr statusor = ShapeInference::InferVariadicOpShape( + HloOpcode::kSort, {&keys, &values_good, &values_bad}); + EXPECT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), HasSubstr("dimensions must match")) << statusor.status(); } +TEST_F(ShapeInferenceTest, SortManyValues) { + auto keys = ShapeUtil::MakeShape(F32, {4}); + auto values_s32 = ShapeUtil::MakeShape(S32, {4}); + auto values_u32 = ShapeUtil::MakeShape(U32, {4}); + StatusOr statusor = ShapeInference::InferVariadicOpShape( + HloOpcode::kSort, {&keys, &values_s32, &values_u32}); + EXPECT_IS_OK(statusor); + Shape inferred_shape = statusor.ValueOrDie(); + EXPECT_TRUE(ShapeUtil::Compatible( + inferred_shape, + ShapeUtil::MakeTupleShape({keys, values_s32, values_u32}))); +} + class ScatterGatherShapeInferenceTest : public ShapeInferenceTest { protected: const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index a571bd571b..d9ebebf74e 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -1073,7 +1073,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { auto values = builder.AddInstruction( HloInstruction::CreateParameter(1, values_shape, "values")); auto sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, + {values})); BuildModuleAndRunAnalysis(builder.Build()); diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc index 5155f0c652..2f18036ff4 100644 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -272,9 +272,11 @@ std::vector FindConstrainedUses( constrained_uses.insert(constrained_uses.end(), converted_uses.begin(), converted_uses.end()); } else if (opcode == HloOpcode::kSort && - instruction->operand_count() == 2 && op_num == 0) { + instruction->operand_count() >= 2 && op_num == 0) { // Operand 0 of sort is the array of keys used for key/value - // (two-operand) kSort instructions. + // (two-operand) kSort instructions. Since sort stability is not + // guaranteed, constrain keys of key-value sort not to have duplicates, + // since otherwise the value order may legitimately differ. constrained_uses.push_back(instruction); } } -- GitLab From 854ae599743a1e92a31ad49cfe42c6454cefd3b9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 20:05:22 -0700 Subject: [PATCH 178/411] Use Ophints to support TfLite UnidirectionaSequenceLstm and add an e2e test. Support peephole and num_proj as well. PiperOrigin-RevId: 216467578 --- .../lite/experimental/examples/lstm/BUILD | 40 ++ .../experimental/examples/lstm/tflite_lstm.py | 396 ++++++++++++++++++ .../lstm/unidirectional_sequence_lstm_test.py | 226 ++++++++++ .../propagate_array_data_types.cc | 6 + .../propagate_fixed_sizes.cc | 47 +++ .../contrib/lite/toco/import_tensorflow.cc | 44 ++ tensorflow/contrib/lite/toco/model.h | 6 + .../contrib/lite/toco/tflite/operator.cc | 39 ++ tensorflow/contrib/lite/toco/tooling_util.cc | 5 +- .../tools/pip_package/pip_smoke_test.py | 4 + 10 files changed, 811 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/BUILD create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/BUILD b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD new file mode 100644 index 0000000000..2125f218ca --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD @@ -0,0 +1,40 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//tensorflow:internal"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_library( + name = "tflite_lstm", + srcs = ["tflite_lstm.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/lite/python:lite", + "//tensorflow/python:framework", + "@six_archive//:six", + ], +) + +py_test( + name = "unidirectional_sequence_lstm_test", + size = "large", + srcs = ["unidirectional_sequence_lstm_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_oss", + "no_pip", + ], + deps = [ + ":tflite_lstm", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/lite/python:lite", + "//tensorflow/examples/tutorials/mnist:input_data", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python/tools:optimize_for_inference", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py new file mode 100644 index 0000000000..2357743266 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py @@ -0,0 +1,396 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TfLite LSTMCell wrapper. + +TODO(renjieliu): Find a better home for this one. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tensorflow as tf + +from tensorflow.contrib.lite.python import lite +from tensorflow.python.keras import activations +from tensorflow.python.keras import initializers +from tensorflow.python.layers import base as base_layer +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.platform import tf_logging as logging + + +class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + This is used only for TfLite, it provides hints and it also makes the + variables in the desired for the tflite ops (transposed and seaparated). + + The default non-peephole implementation is based on: + + https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf + + Felix Gers, Jurgen Schmidhuber, and Fred Cummins. + "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Note that this cell is not optimized for performance. Please use + `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or + `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for + better performance on CPU. + """ + + def __init__(self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=None, + num_proj_shards=None, + forget_bias=1.0, + state_is_tuple=True, + activation=None, + reuse=None, + name=None, + dtype=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell. + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + forget_bias: Biases of the forget gate are initialized by default to 1 in + order to reduce the scale of forgetting at the beginning of the + training. Must set it manually to `0.0` when restoring from CudnnLSTM + trained checkpoints. + state_is_tuple: If True, accepted and returned states are 2-tuples of the + `c_state` and `m_state`. If False, they are concatenated along the + column axis. This latter behavior will soon be deprecated. + activation: Activation function of the inner states. Default: `tanh`. + reuse: (optional) Python boolean describing whether to reuse variables in + an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + name: String, the name of the layer. Layers with the same name will share + weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type of + the first input). Required when `build` is called before `call`. When + restoring from CudnnLSTM-trained checkpoints, use + `CudnnCompatibleLSTMCell` instead. + """ + super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) + # TODO(raziel): decide if we want to just support tuples (yes please!). + if not state_is_tuple: + logging.warn( + "%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", self) + if num_unit_shards is not None or num_proj_shards is not None: + logging.warn( + "%s: The num_unit_shards and proj_unit_shards parameters are " + "deprecated and will be removed in Jan 2017. " + "Use a variable scope with a partitioner instead.", self) + + # Inputs must be 2-dimensional. + # TODO(raziel): layers stuff -- chop if un-layerizing Op. + self.input_spec = base_layer.InputSpec(ndim=2) + + self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceLstm") + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._num_proj = num_proj + self._proj_clip = proj_clip + self._num_unit_shards = num_unit_shards + self._num_proj_shards = num_proj_shards + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + self._activation = activation or math_ops.tanh + + self._output_size = num_proj if num_proj else num_units + self._state_size = ( + tf.nn.rnn_cell.LSTMStateTuple(num_units, self._output_size) + if state_is_tuple else num_units + self._output_size) + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + def build(self, inputs_shape): + """Build TfLite LSTM cell graph. + + Args: + inputs_shape: The inputs_shape must be known, and is [batch_size, + input_size] shape. + + Raises: + ValueError: if the inputs_shape is invalid. + """ + if len(inputs_shape) != 2 or inputs_shape[1].value is None: + raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape) + + input_depth = inputs_shape[1].value + maybe_partitioner = ( + partitioned_variables.fixed_size_partitioner(self._num_unit_shards) + if self._num_unit_shards is not None else None) + input_weight_shape = [self._num_units, input_depth] + cell_weight_shape = [self._num_units, self._output_size] + bias_shape = [self._num_units] + + def add_variable_wrapped(name, shape, initializer, index, partitioner): + var = self.add_variable( + name, shape=shape, initializer=initializer, partitioner=partitioner) + return self._tflite_wrapper.add_input( + var, name="name", index_override=index) + + weight_initializer = self._initializer + if self.dtype is None: + bias_initializer = init_ops.zeros_initializer + else: + bias_initializer = init_ops.zeros_initializer(dtype=self.dtype) + + self.input_to_input_w = add_variable_wrapped( + "input_to_input_w", input_weight_shape, weight_initializer, 1, + maybe_partitioner) + self.input_to_forget_w = add_variable_wrapped( + "input_to_forget_w", input_weight_shape, weight_initializer, 2, + maybe_partitioner) + self.input_to_cell_w = add_variable_wrapped( + "input_to_cell_w", input_weight_shape, weight_initializer, 3, + maybe_partitioner) + self.input_to_output_w = add_variable_wrapped( + "input_to_output_w", input_weight_shape, weight_initializer, 4, + maybe_partitioner) + self.cell_to_input_w = add_variable_wrapped( + "cell_to_input_w", cell_weight_shape, weight_initializer, 5, + maybe_partitioner) + self.cell_to_forget_w = add_variable_wrapped( + "cell_to_forget_w", cell_weight_shape, weight_initializer, 6, + maybe_partitioner) + self.cell_to_cell_w = add_variable_wrapped( + "cell_to_cell_w", cell_weight_shape, weight_initializer, 7, + maybe_partitioner) + self.cell_to_output_w = add_variable_wrapped( + "cell_to_output_w", cell_weight_shape, weight_initializer, 8, + maybe_partitioner) + + self.input_bias = add_variable_wrapped( + "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner) + self.forget_bias = add_variable_wrapped( + "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner) + self.cell_bias = add_variable_wrapped( + "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner) + self.output_bias = add_variable_wrapped( + "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner) + + # index 9, 10, 11. + # f stands for forget, i stands for input and o stands for output. + if self._use_peepholes: + self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units], + self._initializer, 9, + maybe_partitioner) + self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units], + self._initializer, 10, + maybe_partitioner) + self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units], + self._initializer, 11, + maybe_partitioner) + + # index 16 for proj kernel. + if self._num_proj is not None: + maybe_proj_partitioner = ( + partitioned_variables.fixed_size_partitioner(self._num_proj_shards) + if self._num_proj_shards is not None else None) + self._proj_kernel = add_variable_wrapped( + "projection/kernel", [self._num_proj, self._num_units], + self._initializer, + 16, + partitioner=maybe_proj_partitioner) + + self.built = True + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, `[batch, num_units]`. + state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, + [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple + of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch, output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + inputs = self._tflite_wrapper.add_input( + inputs, tag="input", name="input", aggregate="stack", index_override=0) + + # Make sure inputs and bias_initializer has the same type. + assert inputs.dtype == self.input_to_input_w.dtype + + num_proj = self._num_units if self._num_proj is None else self._num_proj + sigmoid = math_ops.sigmoid + + if self._state_is_tuple: + (c_prev, m_prev) = state + else: + c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) + m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) + + # Note: For TfLite, cell_state is at index 19 while activation state at + # index 18. + c_prev = self._tflite_wrapper.add_input( + c_prev, + tag="c_prev", + name="c_prev", + aggregate="first", + index_override=19) + m_prev = self._tflite_wrapper.add_input( + m_prev, + tag="m_prev", + name="m_prev", + aggregate="first", + index_override=18) + + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + + inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1) + + # i stands for input gate. + # f stands for forget gate activation. + # o outputs. + # j output of LSTM unit. + # c is the final state. + # m is the output. + i = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_input_w, self.cell_to_input_w], axis=1), + transpose_b=True), self.input_bias) + f = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_forget_w, self.cell_to_forget_w], axis=1), + transpose_b=True), self.forget_bias) + o = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_output_w, self.cell_to_output_w], axis=1), + transpose_b=True), self.output_bias) + j = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_cell_w, self.cell_to_cell_w], axis=1), + transpose_b=True), self.cell_bias) + + # Diagonal connections + if self._use_peepholes: + c = ( + sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + + sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) + else: + c = ( + sigmoid(f + self._forget_bias) * c_prev + + sigmoid(i) * self._activation(j)) + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + self._w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + transposed_proj_kernel = tf.transpose(self._proj_kernel) + m = math_ops.matmul(m, transposed_proj_kernel) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + c = self._tflite_wrapper.add_output( + c, tag="c", name="c", aggregate="last", index_override=1) + m = self._tflite_wrapper.add_output( + m, tag="m", name="m", index_override=2, aggregate="stack") + + new_state = ( + tf.nn.rnn_cell.LSTMStateTuple(c, m) + if self._state_is_tuple else array_ops.concat([c, m], 1)) + return m, new_state + + def get_config(self): + config = { + "num_units": self._num_units, + "use_peepholes": self._use_peepholes, + "cell_clip": self._cell_clip, + "initializer": initializers.serialize(self._initializer), + "num_proj": self._num_proj, + "proj_clip": self._proj_clip, + "num_unit_shards": self._num_unit_shards, + "num_proj_shards": self._num_proj_shards, + "forget_bias": self._forget_bias, + "state_is_tuple": self._state_is_tuple, + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super(TFLiteLSTMCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py new file mode 100644 index 0000000000..2ca977518c --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py @@ -0,0 +1,226 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tempfile +import numpy as np +import tensorflow as tf + +from tensorflow.contrib.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell +from tensorflow.examples.tutorials.mnist import input_data +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tensorflow.python.tools import optimize_for_inference_lib + +# Number of steps to train model. +TRAIN_STEPS = 1 + +CONFIG = tf.ConfigProto(device_count={"GPU": 0}) + + +class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase): + + def setUp(self): + tf.reset_default_graph() + # Import MNIST dataset + self.mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) + + # Define constants + # Unrolled through 28 time steps + self.time_steps = 28 + # Rows of 28 pixels + self.n_input = 28 + # Learning rate for Adam optimizer + self.learning_rate = 0.001 + # MNIST is meant to be classified in 10 classes(0-9). + self.n_classes = 10 + # Batch size + self.batch_size = 16 + # Lstm Units. + self.num_units = 64 + + def buildLstmLayer(self): + return tf.nn.rnn_cell.MultiRNNCell([ + TFLiteLSTMCell( + self.num_units, use_peepholes=True, forget_bias=0, name="rnn1"), + TFLiteLSTMCell(self.num_units, num_proj=64, forget_bias=0, name="rnn2"), + TFLiteLSTMCell( + self.num_units // 2, + use_peepholes=True, + num_proj=64, + forget_bias=0, + name="rnn3"), + TFLiteLSTMCell(self.num_units, forget_bias=0, name="rnn4") + ]) + + def buildModel(self, lstm_layer, is_dynamic_rnn, is_train): + # Weights and biases for output softmax layer. + out_weights = tf.Variable( + tf.random_normal([self.num_units, self.n_classes])) + out_bias = tf.Variable(tf.random_normal([self.n_classes])) + + # input image placeholder + x = tf.placeholder( + "float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE") + + # For dynamic_rnn, train with dynamic_rnn and inference with static_rnn. + # x is shaped [batch_size,time_steps,num_inputs] + if is_dynamic_rnn: + if is_train: + lstm_input = x + outputs, _ = tf.nn.dynamic_rnn(lstm_layer, lstm_input, dtype="float32") + outputs = tf.unstack(outputs, axis=1) + else: + lstm_input = tf.unstack(x, self.time_steps, 1) + outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32") + else: + lstm_input = tf.unstack(x, self.time_steps, 1) + outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32") + + # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units] + # by the softmax layer's out_weight of shape [num_units,n_classes] + # plus out_bias + prediction = tf.matmul(outputs[-1], out_weights) + out_bias + output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS") + + return x, prediction, output_class + + def trainModel(self, x, prediction, output_class, sess): + # input label placeholder + y = tf.placeholder("float", [None, self.n_classes]) + # Loss function + loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) + # Optimization + opt = tf.train.AdamOptimizer( + learning_rate=self.learning_rate).minimize(loss) + + # Initialize variables + init = tf.global_variables_initializer() + sess.run(init) + for _ in range(TRAIN_STEPS): + batch_x, batch_y = self.mnist.train.next_batch( + batch_size=self.batch_size, shuffle=False) + + batch_x = batch_x.reshape((self.batch_size, self.time_steps, + self.n_input)) + sess.run(opt, feed_dict={x: batch_x, y: batch_y}) + + def saveAndRestoreModel(self, lstm_layer, sess, saver, is_dynamic_rnn): + model_dir = tempfile.mkdtemp() + saver.save(sess, model_dir) + + # Reset the graph. + tf.reset_default_graph() + x, prediction, output_class = self.buildModel( + lstm_layer, is_dynamic_rnn, is_train=False) + + new_sess = tf.Session(config=CONFIG) + saver = tf.train.Saver() + saver.restore(new_sess, model_dir) + return x, prediction, output_class, new_sess + + def getInferenceResult(self, x, output_class, sess): + b1, _ = self.mnist.train.next_batch(batch_size=1) + sample_input = np.reshape(b1, (1, self.time_steps, self.n_input)) + + expected_output = sess.run(output_class, feed_dict={x: sample_input}) + frozen_graph = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, [output_class.op.name]) + return sample_input, expected_output, frozen_graph + + def tfliteInvoke(self, graph, test_inputs, outputs): + tf.reset_default_graph() + # Turn the input into placeholder of shape 1 + tflite_input = tf.placeholder( + "float", [1, self.time_steps, self.n_input], name="INPUT_IMAGE_LITE") + tf.import_graph_def(graph, name="", input_map={"INPUT_IMAGE": tflite_input}) + with tf.Session() as sess: + curr = sess.graph_def + curr = tf.contrib.lite.convert_op_hints_to_stubs(graph_def=curr) + + curr = optimize_for_inference_lib.optimize_for_inference( + curr, ["INPUT_IMAGE_LITE"], ["OUTPUT_CLASS"], + [tf.float32.as_datatype_enum]) + + tflite = tf.contrib.lite.toco_convert( + curr, [tflite_input], [outputs], allow_custom_ops=False) + interpreter = tf.contrib.lite.Interpreter(model_content=tflite) + + try: + interpreter.allocate_tensors() + except ValueError: + assert False + + input_index = (interpreter.get_input_details()[0]["index"]) + interpreter.set_tensor(input_index, test_inputs) + interpreter.invoke() + output_index = (interpreter.get_output_details()[0]["index"]) + result = interpreter.get_tensor(output_index) + # Reset all variables so it will not pollute other inferences. + interpreter.reset_all_variables() + return result + + def testStaticRnnMultiRnnCell(self): + sess = tf.Session(config=CONFIG) + + x, prediction, output_class = self.buildModel( + self.buildLstmLayer(), is_dynamic_rnn=False, is_train=True) + self.trainModel(x, prediction, output_class, sess) + + saver = tf.train.Saver() + x, prediction, output_class, new_sess = self.saveAndRestoreModel( + self.buildLstmLayer(), sess, saver, is_dynamic_rnn=False) + + test_inputs, expected_output, frozen_graph = self.getInferenceResult( + x, output_class, new_sess) + + result = self.tfliteInvoke(frozen_graph, test_inputs, output_class) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3)) + + def testDynamicRnnMultiRnnCell(self): + sess = tf.Session(config=CONFIG) + + x, prediction, output_class = self.buildModel( + self.buildLstmLayer(), is_dynamic_rnn=True, is_train=True) + self.trainModel(x, prediction, output_class, sess) + + # Since we don't yet support OpHints for dynamic, we will load the model + # back in as a static model. This requires the variables to have the same + # names as if they were trained as a static. Thus, we get rid of while/rnn + # names. + variables_to_save = {} + for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): + op_name = i.name + if op_name.startswith("while/rnn/"): + op_name = op_name.split("while/rnn/")[1] + if op_name.endswith(":0"): + op_name = op_name.split(":0")[0] + variables_to_save[op_name] = i + saver = tf.train.Saver(variables_to_save) + + x, prediction, output_class, new_sess = self.saveAndRestoreModel( + self.buildLstmLayer(), sess, saver, is_dynamic_rnn=True) + + test_inputs, expected_output, frozen_graph = self.getInferenceResult( + x, output_class, new_sess) + + result = self.tfliteInvoke(frozen_graph, test_inputs, output_class) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index 40cd6dea82..47faa20a29 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -239,6 +239,12 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op, } break; } + case OperatorType::kUnidirectionalSequenceLstm: { + const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type; + if (data_type != ArrayDataType::kFloat) return ::tensorflow::Status::OK(); + SetDataTypeForAllOutputs(model, op, data_type); + break; + } default: { // These operators produce outputs with the same type as their 1st input CHECK_GT(op->inputs.size(), 0); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 5496e2093e..e861df2b3d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -946,6 +946,49 @@ void ProcessLstmCellOperator(Model* model, LstmCellOperator* op) { .copy_shape(activ_temp_shape); } +void ProcessUnidirectionalSequenceLstmOperator( + Model* model, UnidirectionalSequenceLstmOperator* op) { + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.has_shape()) { + // Shape already propagated + return; + } + + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return; + } + + // TODO(renjieliu): check the inputs, as well as all kinds of weights. + const auto& input_array = model->GetArray(op->inputs[0]); + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + const int batch_size = input_shape.dims(1); + const int timestamp = input_shape.dims(0); + + const auto& recurrent_to_output_weights_array = + model->GetArray(op->inputs[8]); + // Yield until input dims have been resolved. + if (!recurrent_to_output_weights_array.has_shape()) { + return; + } + + constexpr int kInputActivationStateTensor = 18; + constexpr int kInputCellStateTensor = 19; + // b(115961645): This is a hack to work around. + model->GetArray(op->inputs[kInputActivationStateTensor]).buffer.reset(); + model->GetArray(op->inputs[kInputCellStateTensor]).buffer.reset(); + + const auto& output_weights_shape = recurrent_to_output_weights_array.shape(); + const int output_size = output_weights_shape.dims(1); + + Shape* output_shape = output_array.mutable_shape(); + output_shape->ReplaceDims({timestamp, batch_size, output_size}); +} + void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) { const auto& input_array = model->GetArray(op->inputs[0]); // Yield until input dims have been resolved. @@ -1800,6 +1843,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) { ProcessResizeBilinearOperator(model, static_cast(op)); break; + case OperatorType::kUnidirectionalSequenceLstm: + ProcessUnidirectionalSequenceLstmOperator( + model, static_cast(op)); + break; case OperatorType::kLstmCell: ProcessLstmCellOperator(model, static_cast(op)); break; diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 32f22e1ea0..6b195cc992 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/public/session_options.h" @@ -2002,6 +2003,48 @@ tensorflow::Status ConvertCTCBeamSearchDecoderOperator( return tensorflow::Status::OK(); } +// This isn't a TensorFlow builtin op. Currently this node can only be generated +// with TfLite OpHint API. +tensorflow::Status ConvertUnidirectionalSequenceLstm( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + DCHECK_EQ(node.op(), "UnidirectionalSequenceLstm"); + + auto* op = new UnidirectionalSequenceLstmOperator(); + const auto& indices = GetListAttr(node, "_tflite_input_indices"); + if (indices.i_size() != node.input().size()) { + return tensorflow::errors::InvalidArgument("Input size does not match."); + } + + // The input size needs to be the same as the TfLite UniDirectionalSequence + // Lstm implementation. + const int kInputsSize = 20; + + op->inputs.resize(kInputsSize); + std::vector done(kInputsSize); + int idx = 0; + for (const string& input : node.input()) { + int real_index = indices.i(idx); + op->inputs[real_index] = (input); + done[real_index] = true; + idx++; + } + + for (int idx = 0; idx < done.size(); idx++) { + if (!done[idx]) { + string optional_name = node.name() + "_" + std::to_string(idx); + model->CreateOptionalArray(optional_name); + op->inputs[idx] = optional_name; + } + } + + // There're three outputs, only the last one is required. + op->outputs.push_back(node.name() + ":2"); + model->operators.emplace_back(op); + + return tensorflow::Status::OK(); +} + } // namespace namespace internal { @@ -2121,6 +2164,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {"Transpose", ConvertSimpleOperator}, {"Unpack", ConvertUnpackOperator}, {"ZerosLike", ConvertSimpleOperator}, + {"UnidirectionalSequenceLstm", ConvertUnidirectionalSequenceLstm}, }); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 61f1f095e9..f3b84430db 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -58,6 +58,7 @@ enum class OperatorType : uint8 { kL2Normalization, kL2Pool, kLstmCell, + kUnidirectionalSequenceLstm, kLocalResponseNormalization, kLog, kLogistic, @@ -635,6 +636,11 @@ struct LstmCellOperator : Operator { KernelType kernel_type; }; +struct UnidirectionalSequenceLstmOperator : Operator { + UnidirectionalSequenceLstmOperator() + : Operator(OperatorType::kUnidirectionalSequenceLstm) {} +}; + // Element-wise multiplication operator. // // Inputs: diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index ed37535fe0..e08a61d357 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -741,6 +741,42 @@ class Lstm : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + // Current toco converter only supports tanh, no clip. + return ::tflite::CreateUnidirectionalSequenceLSTMOptions( + *builder, /*fused_activation_function=*/ + ::tflite::ActivationFunctionType_TANH, + /*cell_clip=*/0.0, + /*proj_clip=*/0.0); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + // Only support tanh activation, so check that tflite type is tanh. + DCHECK(options.fused_activation_function() == + ::tflite::ActivationFunctionType_TANH); + } + + int GetVersion(const Operator& op) const override { return 1; } + + std::vector GetMutatingInputVariables( + const Operator& op) const override { + std::vector mutating_input_variables(op.inputs.size(), false); + mutating_input_variables[kInputActivationStateTensor] = true; + mutating_input_variables[kInputCellStateTensor] = true; + return mutating_input_variables; + } +}; + class Mean : public BuiltinOperator { public: @@ -1435,6 +1471,9 @@ std::vector> BuildOperatorList( OperatorType::kFakeQuant)); ops.push_back( MakeUnique(::tflite::BuiltinOperator_PACK, OperatorType::kPack)); + ops.emplace_back(MakeUnique( + ::tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + OperatorType::kUnidirectionalSequenceLstm)); ops.push_back(MakeUnique(::tflite::BuiltinOperator_ONE_HOT, OperatorType::kOneHot)); ops.push_back(MakeUnique(::tflite::BuiltinOperator_UNPACK, diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 083a96ad9d..61aa311212 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -407,6 +407,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder) HANDLE_OPERATORTYPENAME_CASE(Unpack) HANDLE_OPERATORTYPENAME_CASE(ZerosLike) + HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceLstm) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE @@ -898,12 +899,12 @@ void CheckNoMissingArray(const Model& model) { void FixNoMissingArray(Model* model) { for (const auto& op : model->operators) { for (const auto& input : op->inputs) { - if (!model->HasArray(input)) { + if (!model->HasArray(input) && !model->IsOptionalArray(input)) { model->GetOrCreateArray(input); } } for (const auto& output : op->outputs) { - if (!model->HasArray(output)) { + if (!model->HasArray(output) && !model->IsOptionalArray(output)) { model->GetOrCreateArray(output); } } diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index c6ef82ccdc..45106b35fc 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -85,6 +85,10 @@ BLACKLIST = [ # contrib "//tensorflow/contrib/session_bundle:session_bundle_half_plus_two", "//tensorflow/contrib/keras:testing_utils", + "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm", + "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm.py", + "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test", # pylint:disable=line-too-long + "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test.py", # pylint:disable=line-too-long "//tensorflow/contrib/lite/python:interpreter", "//tensorflow/contrib/lite/python:interpreter_test", "//tensorflow/contrib/lite/python:interpreter.py", -- GitLab From 5d670479c6ea20c510fa46ae1bb45123df75e067 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 20:51:50 -0700 Subject: [PATCH 179/411] Add a more verbose error message. PiperOrigin-RevId: 216471178 --- tensorflow/contrib/lite/kernels/embedding_lookup.cc | 10 ++++++++-- .../contrib/lite/kernels/embedding_lookup_sparse.cc | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc index fe33f98eb0..1d0c71ad48 100644 --- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc +++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc @@ -78,7 +78,10 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { int idx = lookup->data.i32[i]; if (idx >= row_size || idx < 0) { - context->ReportError(context, "Embedding Lookup: index out of bounds."); + context->ReportError(context, + "Embedding Lookup: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, row_size - 1); return kTfLiteError; } else { memcpy(output->data.raw + i * row_bytes, @@ -104,7 +107,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { int idx = lookup->data.i32[i]; if (idx >= row_size || idx < 0) { - context->ReportError(context, "Embedding Lookup: index out of bounds."); + context->ReportError(context, + "Embedding Lookup: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, row_size - 1); return kTfLiteError; } else { // Dequantize embedding values. diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc index aa75b03990..0b076941ea 100644 --- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc +++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc @@ -188,7 +188,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int idx = ids->data.i32[i]; if (idx >= num_rows || idx < 0) { context->ReportError(context, - "Embedding Lookup Sparse: index out of bounds."); + "Embedding Lookup Sparse: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, num_rows - 1); return kTfLiteError; } -- GitLab From 513de7eaeffe5deb1d1a8c42d24028045f8046e5 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Wed, 10 Oct 2018 13:55:21 +0900 Subject: [PATCH 180/411] fixed documentation formatting --- .../core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt index 3c8a455983..9c4015eaa4 100644 --- a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt @@ -42,8 +42,5 @@ We specify the size-related attributes as: ``` END } - summary: < Date: Tue, 9 Oct 2018 21:54:32 -0700 Subject: [PATCH 181/411] Fix lstm_test&layer_norm_lstm_test w/ Clang 8.0.0 PiperOrigin-RevId: 216475683 --- .../lite/kernels/layer_norm_lstm_test.cc | 116 +++++++++--------- tensorflow/contrib/lite/kernels/lstm_test.cc | 92 +++++++------- 2 files changed, 102 insertions(+), 106 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc index 479f6a7d3c..1535f750f9 100644 --- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc @@ -129,87 +129,85 @@ class LayerNormLSTMOpModel : public SingleOpModel { BuildInterpreter(input_shapes); } - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { PopulateTensor(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { PopulateTensor(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { PopulateTensor(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { PopulateTensor(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { PopulateTensor(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { PopulateTensor(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { PopulateTensor(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { PopulateTensor(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { PopulateTensor(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { PopulateTensor(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { PopulateTensor(cell_to_output_weights_, f); } - void SetInputLayerNormWeights(std::initializer_list f) { + void SetInputLayerNormWeights(std::vector f) { PopulateTensor(input_layer_norm_weights_, f); } - void SetForgetLayerNormWeights(std::initializer_list f) { + void SetForgetLayerNormWeights(std::vector f) { PopulateTensor(forget_layer_norm_weights_, f); } - void SetCellLayerNormWeights(std::initializer_list f) { + void SetCellLayerNormWeights(std::vector f) { PopulateTensor(cell_layer_norm_weights_, f); } - void SetOutputLayerNormWeights(std::initializer_list f) { + void SetOutputLayerNormWeights(std::vector f) { PopulateTensor(output_layer_norm_weights_, f); } - void SetInputGateBias(std::initializer_list f) { + void SetInputGateBias(std::vector f) { PopulateTensor(input_gate_bias_, f); } - void SetForgetGateBias(std::initializer_list f) { + void SetForgetGateBias(std::vector f) { PopulateTensor(forget_gate_bias_, f); } - void SetCellBias(std::initializer_list f) { - PopulateTensor(cell_bias_, f); - } + void SetCellBias(std::vector f) { PopulateTensor(cell_bias_, f); } - void SetOutputGateBias(std::initializer_list f) { + void SetOutputGateBias(std::vector f) { PopulateTensor(output_gate_bias_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { PopulateTensor(projection_weights_, f); } - void SetProjectionBias(std::initializer_list f) { + void SetProjectionBias(std::vector f) { PopulateTensor(projection_bias_, f); } @@ -278,67 +276,67 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel { use_projection_bias, cell_clip, proj_clip, input_shapes, TensorType_UINT8) {} - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_output_weights_, f); } - void SetInputLayerNormWeights(std::initializer_list f) { + void SetInputLayerNormWeights(std::vector f) { PopulateTensor(input_layer_norm_weights_, f); } - void SetForgetLayerNormWeights(std::initializer_list f) { + void SetForgetLayerNormWeights(std::vector f) { PopulateTensor(forget_layer_norm_weights_, f); } - void SetCellLayerNormWeights(std::initializer_list f) { + void SetCellLayerNormWeights(std::vector f) { PopulateTensor(cell_layer_norm_weights_, f); } - void SetOutputLayerNormWeights(std::initializer_list f) { + void SetOutputLayerNormWeights(std::vector f) { PopulateTensor(output_layer_norm_weights_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { SymmetricQuantizeAndPopulate(projection_weights_, f); } }; @@ -346,26 +344,26 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel { class BaseLayerNormLstmTest : public ::testing::Test { protected: // Weights of the Layer Norm LSTM model. Some are optional. - std::initializer_list input_to_input_weights_; - std::initializer_list input_to_cell_weights_; - std::initializer_list input_to_forget_weights_; - std::initializer_list input_to_output_weights_; - std::initializer_list input_gate_bias_; - std::initializer_list cell_gate_bias_; - std::initializer_list forget_gate_bias_; - std::initializer_list output_gate_bias_; - std::initializer_list recurrent_to_input_weights_; - std::initializer_list recurrent_to_cell_weights_; - std::initializer_list recurrent_to_forget_weights_; - std::initializer_list recurrent_to_output_weights_; - std::initializer_list cell_to_input_weights_; - std::initializer_list cell_to_forget_weights_; - std::initializer_list cell_to_output_weights_; - std::initializer_list input_layer_norm_weights_; - std::initializer_list forget_layer_norm_weights_; - std::initializer_list cell_layer_norm_weights_; - std::initializer_list output_layer_norm_weights_; - std::initializer_list projection_weights_; + std::vector input_to_input_weights_; + std::vector input_to_cell_weights_; + std::vector input_to_forget_weights_; + std::vector input_to_output_weights_; + std::vector input_gate_bias_; + std::vector cell_gate_bias_; + std::vector forget_gate_bias_; + std::vector output_gate_bias_; + std::vector recurrent_to_input_weights_; + std::vector recurrent_to_cell_weights_; + std::vector recurrent_to_forget_weights_; + std::vector recurrent_to_output_weights_; + std::vector cell_to_input_weights_; + std::vector cell_to_forget_weights_; + std::vector cell_to_output_weights_; + std::vector input_layer_norm_weights_; + std::vector forget_layer_norm_weights_; + std::vector cell_layer_norm_weights_; + std::vector output_layer_norm_weights_; + std::vector projection_weights_; // Layer Norm LSTM input is stored as num_batch x num_inputs vector. std::vector> layer_norm_lstm_input_; diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc index e7ddfceb45..f8947db724 100644 --- a/tensorflow/contrib/lite/kernels/lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/lstm_test.cc @@ -116,71 +116,69 @@ class LSTMOpModel : public SingleOpModel { BuildInterpreter(input_shapes); } - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { PopulateTensor(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { PopulateTensor(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { PopulateTensor(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { PopulateTensor(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { PopulateTensor(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { PopulateTensor(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { PopulateTensor(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { PopulateTensor(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { PopulateTensor(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { PopulateTensor(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { PopulateTensor(cell_to_output_weights_, f); } - void SetInputGateBias(std::initializer_list f) { + void SetInputGateBias(std::vector f) { PopulateTensor(input_gate_bias_, f); } - void SetForgetGateBias(std::initializer_list f) { + void SetForgetGateBias(std::vector f) { PopulateTensor(forget_gate_bias_, f); } - void SetCellBias(std::initializer_list f) { - PopulateTensor(cell_bias_, f); - } + void SetCellBias(std::vector f) { PopulateTensor(cell_bias_, f); } - void SetOutputGateBias(std::initializer_list f) { + void SetOutputGateBias(std::vector f) { PopulateTensor(output_gate_bias_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { PopulateTensor(projection_weights_, f); } - void SetProjectionBias(std::initializer_list f) { + void SetProjectionBias(std::vector f) { PopulateTensor(projection_bias_, f); } @@ -243,51 +241,51 @@ class HybridLSTMOpModel : public LSTMOpModel { use_projection_weights, use_projection_bias, cell_clip, proj_clip, input_shapes, TensorType_UINT8) {} - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_output_weights_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { SymmetricQuantizeAndPopulate(projection_weights_, f); } }; @@ -295,22 +293,22 @@ class HybridLSTMOpModel : public LSTMOpModel { class BaseLstmTest : public ::testing::Test { protected: // Weights of the LSTM model. Some are optional. - std::initializer_list input_to_input_weights_; - std::initializer_list input_to_cell_weights_; - std::initializer_list input_to_forget_weights_; - std::initializer_list input_to_output_weights_; - std::initializer_list input_gate_bias_; - std::initializer_list cell_gate_bias_; - std::initializer_list forget_gate_bias_; - std::initializer_list output_gate_bias_; - std::initializer_list recurrent_to_input_weights_; - std::initializer_list recurrent_to_cell_weights_; - std::initializer_list recurrent_to_forget_weights_; - std::initializer_list recurrent_to_output_weights_; - std::initializer_list cell_to_input_weights_; - std::initializer_list cell_to_forget_weights_; - std::initializer_list cell_to_output_weights_; - std::initializer_list projection_weights_; + std::vector input_to_input_weights_; + std::vector input_to_cell_weights_; + std::vector input_to_forget_weights_; + std::vector input_to_output_weights_; + std::vector input_gate_bias_; + std::vector cell_gate_bias_; + std::vector forget_gate_bias_; + std::vector output_gate_bias_; + std::vector recurrent_to_input_weights_; + std::vector recurrent_to_cell_weights_; + std::vector recurrent_to_forget_weights_; + std::vector recurrent_to_output_weights_; + std::vector cell_to_input_weights_; + std::vector cell_to_forget_weights_; + std::vector cell_to_output_weights_; + std::vector projection_weights_; // LSTM input is stored as num_batch x num_inputs vector. std::vector> lstm_input_; -- GitLab From dcf641daac0f2fee74eafbb0de1d32f6c8c4c6fd Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 9 Oct 2018 22:57:45 -0700 Subject: [PATCH 182/411] Remove python shebang line from gen_git_source. PiperOrigin-RevId: 216479972 --- tensorflow/tensorflow.bzl | 4 ++-- tensorflow/tools/git/BUILD | 6 ++++-- tensorflow/tools/git/gen_git_source.py | 1 - 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index cad5de1b0c..df15914233 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1967,9 +1967,9 @@ def tf_version_info_genrule(): ], outs = ["util/version_info.cc"], cmd = - "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}", + "$(location //tensorflow/tools/git:gen_git_source) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}", local = 1, - tools = [clean_dep("//tensorflow/tools/git:gen_git_source.py")], + tools = [clean_dep("//tensorflow/tools/git:gen_git_source")], ) def tf_py_build_info_genrule(): diff --git a/tensorflow/tools/git/BUILD b/tensorflow/tools/git/BUILD index daa17fbd50..34a5167948 100644 --- a/tensorflow/tools/git/BUILD +++ b/tensorflow/tools/git/BUILD @@ -6,6 +6,8 @@ package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 -exports_files( - ["gen_git_source.py"], +py_binary( + name = "gen_git_source", + srcs = ["gen_git_source.py"], + srcs_version = "PY2AND3", ) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index cc2288a7fa..8e7cd9b104 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); -- GitLab From 5a2d98f7f7cf6f52eb0496bf27be07d9e1f29040 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 23:57:17 -0700 Subject: [PATCH 183/411] Run while loop test that was not being run before. PiperOrigin-RevId: 216483744 --- tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index baea5c0f6d..a5f85b97f7 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -1116,8 +1116,8 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(10.0, r.eval()) def testWhile_Gpu_2(self): - self._testWhile_Gpu_1(use_gpu=False) - self._testWhile_Gpu_1(use_gpu=True) + self._testWhile_Gpu_2(use_gpu=False) + self._testWhile_Gpu_2(use_gpu=True) def testWhileShape(self): with self.cached_session(): -- GitLab From 1409ea9dbd8275dcbd394451d2cb878e0e873d45 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Oct 2018 23:57:18 -0700 Subject: [PATCH 184/411] Delete dead code in batch_scatter_ops_test. PiperOrigin-RevId: 216483746 --- .../python/kernel_tests/batch_scatter_ops_test.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py index 0d41a7e3b3..498e5f05a3 100644 --- a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py @@ -73,16 +73,6 @@ class ScatterTest(test.TestCase): tf_scatter(ref, indices, updates).eval() self.assertAllClose(ref.eval(), new) - def _VariableRankTests(self, - tf_scatter): - vtypes = [np.float32, np.float64] - if tf_scatter != state_ops.scatter_div: - vtypes.append(np.int32) - - for vtype in vtypes: - for itype in (np.int32, np.int64): - self._VariableRankTest(tf_scatter, vtype, itype) - def testVariableRankUpdate(self): vtypes = [np.float32, np.float64] for vtype in vtypes: -- GitLab From 7575e0949703a4dd0ec19e51e568e9abba037728 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 02:01:57 -0700 Subject: [PATCH 185/411] compat: Update forward compatibility horizon to 2018-10-10 PiperOrigin-RevId: 216495091 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 0e14c0e044..b7a1fce586 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 10) @tf_export("compat.forward_compatible") -- GitLab From ee7c9597f4ab8e586e921f9fe3e3c1383417169c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 02:22:32 -0700 Subject: [PATCH 186/411] Emit xla::Or in TensorArrayScatterV3 for PRED types instead of xla::Add Previosuly we emitted xla::Add what isn't supported by some XLA backend on PRED types. PiperOrigin-RevId: 216497939 --- .../compiler/tests/tensor_array_ops_test.py | 37 +++++++++++++++++-- .../tf2xla/kernels/tensor_array_ops.cc | 26 +++++++++---- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py index 78244d0b36..46ca371c8a 100644 --- a/tensorflow/compiler/tests/tensor_array_ops_test.py +++ b/tensorflow/compiler/tests/tensor_array_ops_test.py @@ -920,6 +920,34 @@ class TensorArrayTest(xla_test.XLATestCase): def testTensorArrayEvalEmptyWithDefault(self): self._testTensorArrayEvalEmptyWithDefault() + def _testTensorArrayScatterRead(self, tf_dtype): + with self.cached_session() as session, self.test_scope(): + convert = _make_converter(tf_dtype) + + ta = tensor_array_ops.TensorArray( + dtype=tf_dtype, + tensor_array_name="foo", + size=10) + + indices = constant_op.constant([1, 8]) + value = constant_op.constant(convert([[1.0, -1.0], [10.0, -10.0]])) + id0 = array_ops.placeholder(dtypes.int32) + id1 = array_ops.placeholder(dtypes.int32) + + w = ta.scatter(indices, value) + r0 = w.read(id0) + r1 = w.read(id1) + + # Test aggregation of read + read_vals = session.run([r0, r1], feed_dict={id0: 1, id1: 8}) + self.assertAllEqual(convert([1.0, -1.0]), read_vals[0]) + self.assertAllEqual(convert([10.0, -10.0]), read_vals[1]) + + def testTensorArrayScatterRead(self): + for dtype in self.numeric_tf_types: + self._testTensorArrayScatterRead(dtype) + self._testTensorArrayScatterRead(dtypes.bool) + def testTensorArrayScatterReadAndGradients(self): with self.cached_session() as session, self.test_scope(): ta = tensor_array_ops.TensorArray( @@ -929,15 +957,18 @@ class TensorArrayTest(xla_test.XLATestCase): indices = constant_op.constant([1, 8]) value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]]) + id0 = array_ops.placeholder(dtypes.int32) + id1 = array_ops.placeholder(dtypes.int32) w = ta.scatter(indices, value) - r0 = w.read(1) - r1 = w.read(8) + r0 = w.read(id0) + r1 = w.read(id1) # Test combined gradients + aggregation of read(0). grad = gradients_impl.gradients( ys=[r0, r1], xs=[value], grad_ys=[[2.0, 3.0], [4.0, 5.0]]) - read_vals, grad_vals = session.run([[r0, r1], grad]) + read_vals, grad_vals = session.run([[r0, r1], grad], + feed_dict={id0: 1, id1: 8}) self.assertEqual(len(read_vals), 2) self.assertEqual(len(grad_vals), 1) diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc index 94108b764f..6cdfaf4d97 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc @@ -123,9 +123,10 @@ Status GetTensorArrayShape(const XlaResource* resource, xla::XlaOp DynamicAddSlice(xla::XlaBuilder* builder, const xla::XlaOp& operand, const xla::XlaOp& update, absl::Span update_dims, - const xla::XlaOp& start_indices) { + const xla::XlaOp& start_indices, DataType dtype) { xla::XlaOp current = xla::DynamicSlice(operand, start_indices, update_dims); - xla::XlaOp sum = xla::Add(current, update); + xla::XlaOp sum = + dtype == DT_BOOL ? xla::Or(current, update) : xla::Add(current, update); return xla::DynamicUpdateSlice(operand, sum, start_indices); } @@ -222,8 +223,8 @@ class TensorArrayWriteOp : public XlaOpKernel { slice_shape.InsertDim(0, 1LL); auto update = xla::Reshape(value, slice_shape.dim_sizes()); - xla::XlaOp written = - DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), start_indices); + xla::XlaOp written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), + start_indices, dtype_); OP_REQUIRES_OK(ctx, resource->SetValue(written)); ctx->SetOutput(0, flow); @@ -391,7 +392,11 @@ class TensorArrayScatterOp : public XlaOpKernel { } if (scatter_all_elements_in_order) { - ta = xla::Add(ta, value); + if (dtype_ == DT_BOOL) { + ta = xla::Or(ta, value); + } else { + ta = xla::Add(ta, value); + } } else { auto slice_dims = value_shape.dim_sizes(); slice_dims[0] = 1LL; @@ -414,7 +419,7 @@ class TensorArrayScatterOp : public XlaOpKernel { auto start_indices = xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0(b, 0), xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}})); - ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices); + ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices, dtype_); } } @@ -522,8 +527,13 @@ class TensorArraySplitOp : public XlaOpKernel { value_shape.DebugString(), " vs. ", ta_shape.DebugString())); - OP_REQUIRES_OK(ctx, resource->SetValue(xla::Add( - ta, xla::Reshape(value, ta_shape.dim_sizes())))); + const xla::XlaOp reshape = xla::Reshape(value, ta_shape.dim_sizes()); + if (dtype_ == DT_BOOL) { + ta = xla::Or(ta, reshape); + } else { + ta = xla::Add(ta, reshape); + } + OP_REQUIRES_OK(ctx, resource->SetValue(ta)); ctx->SetOutput(0, flow); } -- GitLab From dd7d31fa7bfa357e58987c2f3881d99c8050b6de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 02:29:11 -0700 Subject: [PATCH 187/411] Change user_set to an absl::flat_hash_set in HloInstruction. absl::flat_hash_set have better performance than a std::unordered_set, which can improve overall compile time. PiperOrigin-RevId: 216498767 --- tensorflow/compiler/xla/service/hlo_instruction.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 93ff04b1e4..81fe1d0a9a 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -28,11 +28,10 @@ limitations under the License. #include #include #include -#include -#include #include #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -1645,7 +1644,7 @@ class HloInstruction { // members. The set enables fast membership testing and the vector enables // fast, stable iteration. std::vector users_; - std::unordered_set user_set_; + absl::flat_hash_set user_set_; // The set of control successors of this instruction. std::vector control_successors_; -- GitLab From d6a3d6a8295359364c86aecc479e6392bcde0ce4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 02:42:39 -0700 Subject: [PATCH 188/411] Automated rollback of commit 950cf87104bfee28e2165fe368f66337b8a1336d PiperOrigin-RevId: 216500702 --- tensorflow/core/graph/graph.cc | 2 +- .../optimizers/data/vectorization/BUILD | 34 ++-- .../data/vectorization/add_vectorizer.cc | 150 ------------------ .../optimizers/data/vectorization_utils.cc | 21 ++- .../data/vectorization_utils_test.cc | 103 ++---------- .../optimization/map_vectorization_test.py | 1 - 6 files changed, 31 insertions(+), 280 deletions(-) delete mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index a17491d4f7..6f068546d2 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -34,7 +34,7 @@ namespace tensorflow { const int Graph::kControlSlot = -1; -struct NodeProperties { +class NodeProperties { public: NodeProperties(const OpDef* op_def, const NodeDef& node_def, const DataTypeSlice inputs, const DataTypeSlice outputs) diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD index 09018d0124..985d6c6c3a 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD +++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD @@ -9,11 +9,7 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all") VECTORIZER_DEPS = [ ":vectorizer_registry", - "//tensorflow/cc:ops", "//tensorflow/core/grappler/optimizers/data:graph_utils", - "//tensorflow/core:core_cpu", - "//tensorflow/cc:scope_internal", - "//tensorflow/cc:cc_ops", ] + tf_protos_all() cc_library( @@ -46,24 +42,6 @@ cc_library( ], ) -tf_cc_test( - name = "vectorizer_registry_test", - srcs = ["vectorizer_registry_test.cc"], - deps = [ - ":vectorizer_registry", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ] + tf_protos_all(), -) - -cc_library( - name = "add_vectorizer", - srcs = ["add_vectorizer.cc"], - deps = VECTORIZER_DEPS, - alwayslink = 1, -) - cc_library( name = "cast_vectorizer", srcs = ["cast_vectorizer.cc"], @@ -83,10 +61,20 @@ cc_library( hdrs = ["vectorizer_registry.h"], visibility = ["//visibility:public"], deps = [ - ":add_vectorizer", ":cast_vectorizer", ":unpack_vectorizer", ":vectorizer", ":vectorizer_registry", ], ) + +tf_cc_test( + name = "vectorizer_registry_test", + srcs = ["vectorizer_registry_test.cc"], + deps = [ + ":vectorizer_registry", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ] + tf_protos_all(), +) diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc deleted file mode 100644 index d90a51b01a..0000000000 --- a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/cc/framework/ops.h" -#include "tensorflow/cc/framework/scope_internal.h" -#include "tensorflow/cc/ops/array_ops.h" -#include "tensorflow/cc/ops/math_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h" - -namespace tensorflow { -namespace grappler { - -namespace { - -const char* const kExpandDimsPrefix = "vectorized/expanddims/"; - -// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading -// dimension, which may cause automatic broadcasting rules to expand the -// input dimensions wrongly when the unstacked shapes have different ranks. -// To avoid that, we reshape stacked inputs to the maximum rank they need -// to be broadcasted to. -// -// For example, suppose we have inputs A and B, where A is a stacked tensor with -// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with -// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules -// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that -// the dimensions n and 7 are compatible, and if so, create an output of shape -// [12, 7, 5]. However, correct addition of these inputs would create an output -// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A -// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before -// broadcasting. -Status ExpandDimsForBroadcast(std::vector* inputs, Graph* g) { - Status status; - Scope parent = NewInternalScope(g, &status, nullptr); - Scope s = parent.NewSubScope(kExpandDimsPrefix); - - // TODO(rachelim): We can potentially get rid of all these ops if shapes are - // known statically - - Output const_0 = ops::Const(s, 0); - Output const_1 = ops::Const(s, 1); - - std::vector ranks; - ranks.reserve(inputs->size()); - - // Get the stacked rank of each input - for (const auto& input : *inputs) { - Output rank = ops::Rank(s, Output(input.node, input.output_index)); - - if (!input.stacked) { - // If the input is unstacked, add 1 - rank = ops::Add(s, rank, const_1); - } - - ranks.push_back(rank); - } - - // Pack the ranks into one tensor to get the max - Output packed_ranks = ops::Stack(s, ranks); - - Output max_rank = - ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true)); - - std::vector expanded_inputs; - expanded_inputs.reserve(inputs->size()); - - // For all inputs that are stacked, expand dimensions after dim 0. - for (size_t i = 0; i < inputs->size(); ++i) { - if (!inputs->at(i).stacked) { - expanded_inputs.push_back(inputs->at(i)); - continue; - } - - Output input(inputs->at(i).node, inputs->at(i).output_index); - - // Number of dimensions to expand - Output rank_diff = ops::Sub(s, max_rank, ranks[i]); - - // [1] * rank_diff - Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff); - - Output const_vec_1 = ops::Const(s, {1}); - - Output shape = ops::Shape(s, input); - - // shape[:1] - Output concat_pre = - ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, - ops::StridedSlice::Attrs().BeginMask(1)); - - // shape[1:] - Output concat_post = - ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, - ops::StridedSlice::Attrs().EndMask(1)); - - // tf.concat([shape[:1], ones, shape[1:]], 0) - Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0); - - Output result = ops::Reshape(s, input, new_shape); - - expanded_inputs.push_back({result.node(), 0, true}); - } - - inputs->swap(expanded_inputs); - return status; -} - -class AddVectorizer : public Vectorizer { - public: - Status Vectorize(const Node& node, Graph* outer_scope, - std::vector&& inputs, - std::vector* outputs) override { - if (node.num_inputs() != 2) { - return errors::Internal("Add op should only have two inputs."); - } - - TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope)); - - // Add new Add node with the same op and attrs as the original node - Node* new_add_node; - TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add") - .Input(inputs[0].node, inputs[0].output_index) - .Input(inputs[1].node, inputs[1].output_index) - .Finalize(outer_scope, &new_add_node)); - - // Add output mappings - outputs->push_back({new_add_node, 0, true}); - return Status::OK(); - } -}; - -REGISTER_VECTORIZER("Add", AddVectorizer); - -} // namespace -} // namespace grappler -} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index 8b93b1f2b8..d977ff3198 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -64,18 +64,9 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src, } } -// Update node attrs to keep its properties consistent with the function -void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) { - map_defun_node->AddAttr("output_types", map_defun_fn->ret_types); - - // TODO(rachelim): Propagate precise shapes if they're known, which may enable - // subsequent optimizations. - map_defun_node->AddAttr("output_shapes", std::vector( - map_defun_fn->ret_types.size())); -} - Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, const TensorDesc& output) { + // Note that we don't update MapDefun attrs as we go, only when we are done DataType type = output.first->output_type(output.second); int index = map_defun_fn->ret_nodes.size(); @@ -92,13 +83,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0); map_defun_fn->ret_nodes.push_back(ret_node); map_defun_fn->ret_types.push_back(type); - UpdateMapDefunAttrs(map_defun_fn, map_defun_node); return s; } void RemoveMapDefunOutput(int output_position, Graph* outer_scope, FunctionBody* map_defun_fn, Node* map_defun_node) { + // Note that we don't update MapDefun attrs as we go, only when we are done DCHECK_LT(output_position, map_defun_fn->ret_nodes.size()) << "Trying to remove output that doesn't exist. Output number: " << output_position; @@ -111,7 +102,6 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope, output_position); map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() + output_position); - UpdateMapDefunAttrs(map_defun_fn, map_defun_node); // Renumber the nodes and edges that come after for (int i = 0; i < num_later_outputs; ++i) { @@ -352,6 +342,13 @@ void Vectorization::VectorizeHelper() { // need the MapDefun node and can delete it. if (map_defun_fn_->ret_nodes.empty()) { outer_scope_->RemoveNode(map_defun_node_); + } else { + // Update MapDefun node attrs accordingly + DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size()); + map_defun_node_->AddAttr( + "output_shapes", + std::vector(map_defun_fn_->ret_types.size())); + map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types); } } diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc index be498d150b..a6020e36bb 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc @@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) { FunctionDef* vectorized; Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized); LOG(ERROR) << s; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_EQ(GetRetval(*vectorized, 0), "ret0"); @@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); auto map_defun_node = vectorized->node_def( function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized)); @@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& unpack_node = vectorized->node_def( @@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); // They should be unchanged // We check this somewhat manually as the names of nodes may have changed EXPECT_EQ(vectorized->node_def_size(), 1); @@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized)); @@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); auto const_node = vectorized->node_def( @@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { *lib.add_function() = inner; FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); auto find_const = [vectorized](int val) -> const NodeDef* { for (const auto& n : vectorized->node_def()) { @@ -924,89 +924,6 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name())); } -// Before: -// -// +------+ -// +-----------------+ Arg0 +----------------------+ -// | +---+--+ | -// | | | -// | +---v--+ | -// | +-------------+ Arg0 +------------------+ | -// | | +---+--+ | | -// | | | | | -// | | | +-----+ | | -// | | | |Const| | | -// | | | +-+---+ | | -// | | | | | | -// | | | +--------+ | | -// | | | | | | -// | | +-v---v-+ | | -// | | | Add | | | -// | | +-+-----+ | | -// | | | | | -// | | | | | -// | | MapDefun +-v----+ | | -// | +---------------| Ret |----------------+ | -// | +--v---+ | -// | | | -// | | | -// | +--v---- | -// +-------------------| Ret |--------------------+ -// +------+ -// -// -// After: -// -// +------+ -// +------------+ Arg0 +----------------------+ -// | +---+--+ | -// | | | -// | | +-----+ | -// | | |Const| | -// | +-v---------+ +--+--+ | -// | |ExpandDims*| | | -// | +-----+-----+ | | -// | | | | -// | +-----+ +-----+ | -// | | | | -// | +-v-v-+ | -// | | Add | | -// | +--+--+ | -// | | | -// | +---v--+ | -// +-----------------------+ Ret +-----------+ -// +------+ -// -TEST(VectorizeMapDefunTest, VectorizeDefunAdd) { - // Note that this checks that the "Add" vectorizer is successful, but does not - // check that the transformed function is correct (i.e. produces the same - // output as the unvectorized map defun). For the latter, the tests are in - // tensorflow/python/data/experimental/kernel_tests/optimization/ - // map_vectorization_test.py - FunctionDef inner = FunctionDefHelper::Create( - "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */}, - {/* nodes */ FunctionDefHelper::Const("Const", 2), - {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}}, - {{"ret0", "Add:z:0"}}); - - FunctionDef outer = FunctionDefHelper::Create( - "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"}, - {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}}); - - NodeDef* map_defun = - AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}}, - inner.signature().name(), &outer); - CHECK_NOTNULL(map_defun); - - FunctionDefLibrary lib; - *lib.add_function() = outer; - *lib.add_function() = inner; - FunctionDef* vectorized; - TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); - EXPECT_TRUE( - !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); -} - // TODO(rachelim): More test cases when we get around to implementing them: // [] A badly defined converter, e.g. doesn't produce nodes that have the // same number of outputs/inputs as the nodes to be converted diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py index d1d6cf28ab..803ff87924 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py @@ -80,7 +80,6 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): ("Basic", lambda x: (x, x + 1), None), ("Const", lambda x: 2, 12), ("Parallel", lambda x: (x, x + 1), 12), - ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None), ("Gather", lambda x: array_ops.gather(x, 0), 12), ) def testOptimization(self, map_fn, num_parallel_calls): -- GitLab From 028ca321cb7b476868dcb39585d5cd361d81f05f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 03:29:54 -0700 Subject: [PATCH 189/411] Support removing side effecting instructions with RemoveInstructionAndUnusedOperands If the caller explicitly asks to remove a side effceting instruction (e.g. all-reduce) then we should respect it instead of silently ignoring the request. PiperOrigin-RevId: 216505133 --- tensorflow/compiler/xla/service/hlo_computation.cc | 2 +- tensorflow/compiler/xla/service/hlo_computation.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index c2041c4667..b0f7cd91ad 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -215,7 +215,7 @@ Status HloComputation::RemoveInstructionAndUnusedOperands( if (removed.count(item) != 0 || item->user_count() != 0 || item == root_instruction() || !IsRemovable(item) || - item->HasSideEffect()) { + (item->HasSideEffect() && item != instruction)) { continue; } for (int i = 0; i < item->operand_count(); ++i) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index d87ab4bda1..dec96d11a9 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -128,9 +128,10 @@ class HloComputation { // users. Instruction is deallocated with this call. Status RemoveInstruction(HloInstruction* instruction); - // Remove an instruction from the computation and also transitively any - // operand that has no users post removing an instruction. The instruction - // must have no users. Instruction is deallocated with this call. + // Remove an instruction (including side effecting ones) from the computation + // and also transitively any operand that has no side effect and no users post + // removing an instruction. The instruction must have no users. Instruction is + // deallocated with this call. Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction); // Set the root of the computation to the given instruction. The instruction -- GitLab From e851764c24e5ac5f527a7ce2ce12050edddeb209 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 07:17:04 -0700 Subject: [PATCH 190/411] Support kDomain instructions in the HloMatcher framework PiperOrigin-RevId: 216525613 --- tensorflow/compiler/xla/service/hlo_matchers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 5502e565b6..b05a012b4a 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -179,6 +179,7 @@ HLO_MATCHER(Convolution); HLO_MATCHER(Copy); HLO_MATCHER(CrossReplicaSum); HLO_MATCHER(Divide); +HLO_MATCHER(Domain); HLO_MATCHER(DynamicSlice); HLO_MATCHER(DynamicUpdateSlice); HLO_MATCHER(Eq); -- GitLab From 93226f635c5c108b3b501d8bbcf27e64dec49fb9 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 07:38:42 -0700 Subject: [PATCH 191/411] Use overloaded operators for the assert statement. This should remove the reliance on importing tensorflow in the generated code. PiperOrigin-RevId: 216528047 --- .../python/autograph/converters/asserts.py | 2 +- .../autograph/converters/asserts_test.py | 24 +++-- tensorflow/python/autograph/operators/BUILD | 11 +++ .../python/autograph/operators/__init__.py | 1 + .../python/autograph/operators/exceptions.py | 86 ++++++++++++++++++ .../autograph/operators/exceptions_test.py | 87 +++++++++++++++++++ 6 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 tensorflow/python/autograph/operators/exceptions.py create mode 100644 tensorflow/python/autograph/operators/exceptions_test.py diff --git a/tensorflow/python/autograph/converters/asserts.py b/tensorflow/python/autograph/converters/asserts.py index 56a97534c4..4ba827c35f 100644 --- a/tensorflow/python/autograph/converters/asserts.py +++ b/tensorflow/python/autograph/converters/asserts.py @@ -33,7 +33,7 @@ class AssertTransformer(converter.Base): # Note: The lone tf.Assert call will be wrapped with control_dependencies # by side_effect_guards. template = """ - tf.Assert(test, (msg,)) + ag__.assert_stmt(test, lambda: msg) """ if node.msg is None: diff --git a/tensorflow/python/autograph/converters/asserts_test.py b/tensorflow/python/autograph/converters/asserts_test.py index 01282f9e62..eef628aeb6 100644 --- a/tensorflow/python/autograph/converters/asserts_test.py +++ b/tensorflow/python/autograph/converters/asserts_test.py @@ -18,24 +18,30 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import gast - from tensorflow.python.autograph.converters import asserts +from tensorflow.python.autograph.converters import side_effect_guards from tensorflow.python.autograph.core import converter_testing +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl +from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.platform import test class AssertsTest(converter_testing.TestCase): - def test_transform(self): + def test_basic(self): def test_fn(a): - assert a > 0 - - node, ctx = self.prepare(test_fn, {}) - node = asserts.transform(node, ctx) - - self.assertTrue(isinstance(node.body[0].value, gast.Call)) + assert a, 'test message' + return tf.no_op() # pylint:disable=undefined-variable + + with self.converted(test_fn, (asserts, side_effect_guards), {}, + gen_control_flow_ops.no_op) as result: + with self.cached_session() as sess: + op = result.test_fn(constant_op.constant(False)) + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + 'test message'): + sess.run(op) if __name__ == '__main__': diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index a116611b64..f422911377 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -22,6 +22,7 @@ py_library( "__init__.py", "control_flow.py", "data_structures.py", + "exceptions.py", "py_builtins.py", "slices.py", ], @@ -62,6 +63,16 @@ py_test( ], ) +py_test( + name = "exceptions_test", + srcs = ["exceptions_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":operators", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "py_builtins_test", srcs = ["py_builtins_test.py"], diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py index 0d3b44b6c4..53f4b0ddc8 100644 --- a/tensorflow/python/autograph/operators/__init__.py +++ b/tensorflow/python/autograph/operators/__init__.py @@ -45,6 +45,7 @@ from tensorflow.python.autograph.operators.data_structures import list_stack from tensorflow.python.autograph.operators.data_structures import ListPopOpts from tensorflow.python.autograph.operators.data_structures import ListStackOpts from tensorflow.python.autograph.operators.data_structures import new_list +from tensorflow.python.autograph.operators.exceptions import assert_stmt from tensorflow.python.autograph.operators.py_builtins import float_ from tensorflow.python.autograph.operators.py_builtins import int_ from tensorflow.python.autograph.operators.py_builtins import len_ diff --git a/tensorflow/python/autograph/operators/exceptions.py b/tensorflow/python/autograph/operators/exceptions.py new file mode 100644 index 0000000000..6078160f68 --- /dev/null +++ b/tensorflow/python/autograph/operators/exceptions.py @@ -0,0 +1,86 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Exception handling statements: assert, etc.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.util import tf_inspect + + +def assert_stmt(expression1, expression2): + """Functional form of an assert statement. + + This follows the semantics of the Python assert statement, however the + concrete implementations may deviate from it. See the respective + implementation for details. + + In general, the assert statement should not be used for control flow. + Furthermore, it is encouraged that the assertion expressions should not have + side effects. + + Args: + expression1: Any + expression2: Callable[[], Any], returns the expression to include in the + error message when expression1 evaluates to False. When expression1 is + True, the result of expression2 will not be evaluated, however, + expression2 itself may be evaluated in some implementations. + + Returns: + Any, implementation-dependent. + + Raises: + ValueError: if any arguments are illegal. + """ + if not callable(expression2): + raise ValueError('{} must be a callable'.format(expression2)) + args, _, keywords, _ = tf_inspect.getargspec(expression2) + if args or keywords: + raise ValueError('{} may not have any arguments'.format(expression2)) + + if tensor_util.is_tensor(expression1): + return _tf_assert_stmt(expression1, expression2) + else: + return _py_assert_stmt(expression1, expression2) + + +def _tf_assert_stmt(expression1, expression2): + """Overload of assert_stmt that stages a TF Assert. + + This implementation deviates from Python semantics as follows: + (1) the assertion is verified regardless of the state of __debug__ + (2) on assertion failure, the graph execution will fail with + tensorflow.errors.ValueError, rather than AssertionError. + + Args: + expression1: tensorflow.Tensor, must evaluate to a tf.bool scalar + expression2: Callable[[], Union[tensorflow.Tensor, List[tensorflow.Tensor]]] + + Returns: + tensorflow.Operation + """ + expression2_tensors = expression2() + if not isinstance(expression2_tensors, list): + expression2_tensors = [expression2_tensors] + return control_flow_ops.Assert(expression1, expression2_tensors) + + +def _py_assert_stmt(expression1, expression2): + """Overload of assert_stmt that executes a Python assert statement.""" + assert expression1, expression2() + return None diff --git a/tensorflow/python/autograph/operators/exceptions_test.py b/tensorflow/python/autograph/operators/exceptions_test.py new file mode 100644 index 0000000000..186535d05b --- /dev/null +++ b/tensorflow/python/autograph/operators/exceptions_test.py @@ -0,0 +1,87 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for exceptions module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.autograph.operators import exceptions +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl +from tensorflow.python.platform import test + + +class ExceptionsTest(test.TestCase): + + def test_assert_tf_untriggered(self): + with self.cached_session() as sess: + t = exceptions.assert_stmt( + constant_op.constant(True), lambda: constant_op.constant('ignored')) + sess.run(t) + + def test_assert_tf_triggered(self): + with self.cached_session() as sess: + t = exceptions.assert_stmt( + constant_op.constant(False), + lambda: constant_op.constant('test message')) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + 'test message'): + sess.run(t) + + def test_assert_tf_multiple_printed_values(self): + two_tensors = [ + constant_op.constant('test message'), + constant_op.constant('another message') + ] + with self.cached_session() as sess: + t = exceptions.assert_stmt( + constant_op.constant(False), lambda: two_tensors) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + 'test message.*another message'): + sess.run(t) + + def test_assert_python_untriggered(self): + side_effect_trace = [] + + def expression_with_side_effects(): + side_effect_trace.append(object()) + return 'test message' + + exceptions.assert_stmt(True, expression_with_side_effects) + + self.assertListEqual(side_effect_trace, []) + + def test_assert_python_triggered(self): + if not __debug__: + # Python assertions only be tested when in debug mode. + return + + side_effect_trace = [] + tracer = object() + + def expression_with_side_effects(): + side_effect_trace.append(tracer) + return 'test message' + + with self.assertRaisesRegexp(AssertionError, 'test message'): + exceptions.assert_stmt(False, expression_with_side_effects) + self.assertListEqual(side_effect_trace, [tracer]) + + +if __name__ == '__main__': + test.main() -- GitLab From 0bb68afa38cf5c45232e85fb09186e01055e4d11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 08:01:45 -0700 Subject: [PATCH 192/411] Fix number of outputs when importing tensorflow GraphDef. Sometimes the actual number of outputs is dictated by one of the attributes of the NodeDef. PiperOrigin-RevId: 216530696 --- .../contrib/lite/toco/import_tensorflow.cc | 22 ++++++++++--- .../lite/toco/import_tensorflow_test.cc | 31 +++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 6b195cc992..ff67b306e0 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1122,13 +1122,27 @@ tensorflow::Status ConvertUnsupportedOperator( op->inputs.push_back(node.input(i)); } - // Parse outputs. - op->outputs.push_back(node.name()); // Implicit :0. + // Parse outputs. Name them after the node's name, plus an ordinal suffix. + // Note that some outputs are to be multipled by a named attribute. const tensorflow::OpDef* op_def = nullptr; if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) { - for (int i = 1; i < op_def->output_arg_size(); ++i) { - op->outputs.push_back(absl::StrCat(node.name(), ":", i)); + int next_output = 0; + for (int i = 0; i < op_def->output_arg_size(); ++i) { + string multiples = op_def->output_arg(i).number_attr(); + int num_outputs = multiples.empty() ? 1 : GetIntAttr(node, multiples); + LOG(INFO) << "dddddddd " << num_outputs; + for (int j = 0; j < num_outputs; ++j) { + if (next_output == 0) { + op->outputs.push_back(node.name()); // Implicit :0. + } else { + op->outputs.push_back(absl::StrCat(node.name(), ":", next_output)); + } + ++next_output; + } } + } else { + LOG(INFO) << "nodef!!!!!!!!!!! "; + op->outputs.push_back(node.name()); // Implicit :0. } // Parse if the op supports quantization diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc index cd9a144b52..0767221b83 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc @@ -55,6 +55,13 @@ Status ImportNode(const NodeDef& node, Model* model) { converter); } +Status ImportFlexNode(const NodeDef& node, Model* model) { + // Empty converter => all nodes are flex nodes. + const auto converter = internal::ConverterMapType(); + return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model, + converter); +} + Status ImportNode(const NodeDef& node) { Model model; return ImportNode(node, &model); @@ -299,5 +306,29 @@ TEST(ImportTest, UnsupportedOpWithWildcardOutputShapes) { ASSERT_TRUE(op->output_shapes.empty()); } +TEST(ImportTest, UnsupportedOpWithMultipleOutputs) { + NodeDef node = BuildNode("Unpack", {}); + + // Unpack's OpDef has a single output which gets multiplied based on the + // "num" attribute of the NodeDef. + AttrValue value_attr; + SetAttrValue(3, &value_attr); // 3 outputs. + (*node.mutable_attr())["num"] = value_attr; + + Model model; + EXPECT_TRUE(ImportFlexNode(node, &model).ok()); + + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + ASSERT_EQ(op->outputs.size(), 3); + ASSERT_EQ(op->outputs[0], "Node1"); + ASSERT_EQ(op->outputs[1], "Node1:1"); + ASSERT_EQ(op->outputs[2], "Node1:2"); +} + } // namespace } // namespace toco -- GitLab From 1ae0a45a5de65ab4ae6def232da016e7ee32773c Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 10 Oct 2018 08:12:24 -0700 Subject: [PATCH 193/411] [tf.data] `Dataset.make_one_shot_iterator()` inherits the random seed from the calling graph. This change makes a subtle difference to the behavior of existing programs that create multiple iterators. Previously, one-shot iterators would not inherit the graph seed, and so their values would be non-deterministic (unless explicit seeds were set). After this change, an iterator will inherit its seed from the outer graph. Multiple one-shot iterators created from the same dataset will inherit different seeds, matching the semantics of creating multiple ops with the same graph seed. PiperOrigin-RevId: 216532256 --- .../kernel_tests/shuffle_dataset_op_test.py | 32 +++++++++++++++++++ tensorflow/python/data/ops/dataset_ops.py | 13 ++++++++ 2 files changed, 45 insertions(+) diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index 8694f58a24..cad28f860e 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -241,6 +241,38 @@ class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertAllEqual(results[0], results[1]) + @parameterized.named_parameters( + ("ReshuffleOneShot", True, False), + ("ReshuffleInitializable", True, True), + ("NoReshuffleOneShot", False, False), + ("NoReshuffleInitializable", False, True), + ) + def testMultipleIterators(self, reshuffle, initializable): + with ops.Graph().as_default() as g: + dataset = dataset_ops.Dataset.range(100).shuffle( + 10, reshuffle_each_iteration=reshuffle).repeat(3) + + if initializable: + iterators = [dataset.make_initializable_iterator() for _ in range(2)] + else: + iterators = [dataset.make_one_shot_iterator() for _ in range(2)] + + results = [] + with self.session(graph=g) as sess: + for iterator in iterators: + if initializable: + sess.run(iterator.initializer) + next_element = iterator.get_next() + run_results = [] + for _ in range(300): + run_results.append(sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + results.append(run_results) + + self.assertNotEqual(results[0], results[1]) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 6195747671..cdb883cac9 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed as core_random_seed from tensorflow.python.framework import smart_cond from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape @@ -178,10 +179,21 @@ class Dataset(object): """ if context.executing_eagerly(): return iterator_ops.EagerIterator(self) + + graph_level_seed, op_level_seed = core_random_seed.get_seed(None) + # NOTE(mrry): We capture by value here to ensure that `_make_dataset()` is # a 0-argument function. @function.Defun(capture_by_value=True) def _make_dataset(): + # NOTE(mrry): `Defun` does not capture the graph-level seed from the + # enclosing graph, so if a graph-level seed is present we set the local + # graph seed based on a combination of the graph- and op-level seeds. + if graph_level_seed is not None: + assert op_level_seed is not None + core_random_seed.set_random_seed( + (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1)) + dataset = self options = self.options() static_optimizations = options._static_optimizations() # pylint: disable=protected-access @@ -2265,6 +2277,7 @@ class ShuffleDataset(UnaryDataset): self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") self._seed, self._seed2 = random_seed.get_seed(seed) + if reshuffle_each_iteration is None: self._reshuffle_each_iteration = True else: -- GitLab From afcc1a4452de7898391683f7cbb16ff548f839a1 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 10 Oct 2018 08:17:09 -0700 Subject: [PATCH 194/411] Allow the executor type for a function to be specified as an attr on a function. This change complements the existing `InstantiateOptions::executor_type` option, which takes precedence over the attr if both are provided. It enables the choice of executor to be separated from both the calling op implementation and the function definition, which simplifies the use of custom executors in operations that take a function as an attr (e.g.) `tf.data` and the functional control-flow ops. PiperOrigin-RevId: 216532778 --- tensorflow/core/common_runtime/function.cc | 2 +- .../core/common_runtime/function_test.cc | 38 +++++++++++++++++-- tensorflow/core/framework/function.cc | 24 ++++++++++-- tensorflow/core/framework/function.h | 7 ++++ 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 472865ca43..e0e5f4a215 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -551,7 +551,7 @@ Status FunctionLibraryRuntimeImpl::Instantiate( item->func_graph = fbody; item->overlay_lib = options.overlay_lib; item->instantiation_counter = 1; - item->executor_type = options.executor_type; + item->executor_type = ExecutorType(options, attrs); items_.emplace(next_handle_, std::unique_ptr(item)); next_handle_++; } diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 7bab9be9a6..716167132b 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -584,7 +584,28 @@ TEST_F(FunctionLibraryRuntimeTest, ExecutorFactory) { "Internal: This is a dummy."); } - // Test that non-existent exector types trigger an error. + // Test that a non-default executor factory can be invoked via an attr. + { + FunctionLibraryRuntime::InstantiateOptions options; + HasError(InstantiateAndRun(flr0_, "XTimesTwo", + {{"T", DT_FLOAT}, {"_executor", "DUMMY"}}, + options, {x}, {&y}), + "Internal: This is a dummy."); + } + + // Test that a non-default executor factory specified via an + // `InstantiateOptions` supersedes the attr when both are present. + { + FunctionLibraryRuntime::InstantiateOptions options; + options.executor_type = "DUMMY"; + HasError( + InstantiateAndRun(flr0_, "XTimesTwo", + {{"T", DT_FLOAT}, {"_executor", "UNKNOWN_EXECUTOR"}}, + options, {x}, {&y}), + "Internal: This is a dummy."); + } + + // Test that non-existent executor types trigger an error. { FunctionLibraryRuntime::InstantiateOptions options; options.executor_type = "UNKNOWN_EXECUTOR"; @@ -593,6 +614,15 @@ TEST_F(FunctionLibraryRuntimeTest, ExecutorFactory) { "Not found: No executor factory registered for the given executor " "type: UNKNOWN_EXECUTOR"); } + { + FunctionLibraryRuntime::InstantiateOptions options; + HasError( + InstantiateAndRun(flr0_, "XTimesTwo", + {{"T", DT_FLOAT}, {"_executor", "UNKNOWN_EXECUTOR"}}, + options, {x}, {&y}), + "Not found: No executor factory registered for the given executor " + "type: UNKNOWN_EXECUTOR"); + } } TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { @@ -869,7 +899,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto x4_x2_scale = ops::Const( - s.WithOpName("x4/x2/scale/_12__cf__10") + s.WithOpName("x4/x2/scale/_12__cf__13") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale); @@ -1076,13 +1106,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); auto scale = ops::Const( - s.WithOpName("scale/_6__cf__15") + s.WithOpName("scale/_6__cf__18") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale); auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x); auto const0 = ops::Const( - s.WithOpName("Func/_1/sy/_5__cf__14") + s.WithOpName("Func/_1/sy/_5__cf__17") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 0, {0}); auto func1_rx = ops::internal::BroadcastGradientArgs( diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index 20f957190b..aa2f274752 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -796,12 +796,28 @@ uint64 FunctionDefHash(const FunctionDef& fdef) { return h; } +static constexpr const char* const kExecutorAttr = "_executor"; + +/* static */ +string FunctionLibraryRuntime::ExecutorType(const InstantiateOptions& options, + AttrSlice attrs) { + if (!options.executor_type.empty()) { + return options.executor_type; + } else if (const AttrValue* executor_attr = attrs.Find(kExecutorAttr)) { + return executor_attr->s(); + } else { + return string(); + } +} + string Canonicalize(const string& funcname, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options) { std::vector entries; entries.reserve(options.target.empty() ? attrs.size() : (attrs.size() + 1)); for (auto p : attrs) { - entries.push_back(strings::StrCat(p.first, "=", Print(p.second))); + if (p.first != kExecutorAttr) { + entries.push_back(strings::StrCat(p.first, "=", Print(p.second))); + } } if (!options.target.empty()) { entries.push_back( @@ -815,9 +831,9 @@ string Canonicalize(const string& funcname, AttrSlice attrs, entries.push_back( strings::StrCat("_state_handle", "=", options.state_handle)); } - if (!options.executor_type.empty()) { - entries.push_back( - strings::StrCat("_executor_type", "=", options.executor_type)); + string executor_type = FunctionLibraryRuntime::ExecutorType(options, attrs); + if (!executor_type.empty()) { + entries.push_back(strings::StrCat(kExecutorAttr, "=", executor_type)); } std::sort(entries.begin(), entries.end()); return strings::StrCat(funcname, "[", str_util::Join(entries, ","), "]"); diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index 4d6d68e214..d4beca7e11 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -609,6 +609,13 @@ class FunctionLibraryRuntime { virtual Status Clone(std::unique_ptr* out_lib_def, std::unique_ptr* out_pflr, FunctionLibraryRuntime** out_flr) = 0; + + // Returns the name of the executor class (in the sense of + // `ExecutorFactory::GetFactory()`) that will be used based on the given + // dynamic `options` and static `attrs`. If none is specified, this method + // will return an empty string, which leaves the decision up to the runtime. + static string ExecutorType(const InstantiateOptions& options, + AttrSlice attrs); }; // Returns a canonicalized string for the instantiation of the -- GitLab From f146d586bf93b918d6f3e014b230abee49170a52 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 08:24:25 -0700 Subject: [PATCH 195/411] Use lambdas when converting ifexps, since they are now supported. PiperOrigin-RevId: 216533613 --- .../converters/conditional_expressions.py | 97 +------------------ .../python/autograph/operators/__init__.py | 1 + 2 files changed, 4 insertions(+), 94 deletions(-) diff --git a/tensorflow/python/autograph/converters/conditional_expressions.py b/tensorflow/python/autograph/converters/conditional_expressions.py index 40728f555d..a4eef7e6a1 100644 --- a/tensorflow/python/autograph/converters/conditional_expressions.py +++ b/tensorflow/python/autograph/converters/conditional_expressions.py @@ -19,109 +19,18 @@ from __future__ import division from __future__ import print_function from tensorflow.python.autograph.core import converter -from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import templates -from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno - - -class _FunctionDefs(object): - - def __init__(self): - self.nodes = [] - - -class _Statement(object): - - def __init__(self): - self.scope = None class ConditionalExpressionTransformer(converter.Base): """Converts contitional expressions to functional form.""" - def _postprocess_statement(self, node): - """Inserts any separate functions that node may use.""" - replacements = [] - for def_node in self.state[_FunctionDefs].nodes: - replacements.extend(def_node) - replacements.append(node) - node = replacements - # The corresponding enter is called by self.visit_block (see _process_block) - self.state[_FunctionDefs].exit() - return node, None - - def _create_branch(self, expr, name_stem): - scope = self.state[_Statement].scope - name = self.ctx.namer.new_symbol(name_stem, scope.referenced) - template = """ - def name(): - return expr, - """ - node = templates.replace(template, name=name, expr=expr) - self.state[_FunctionDefs].nodes.append(node) - return name - def visit_IfExp(self, node): - if anno.hasanno(node.test, anno.Basic.QN): - name_root = anno.getanno(node.test, anno.Basic.QN).ssf() - else: - name_root = 'ifexp' - - true_fn_name = self._create_branch(node.body, '%s_true' % name_root) - false_fn_name = self._create_branch(node.orelse, '%s_false' % name_root) - return templates.replace_as_expression( - 'ag__.utils.run_cond(test, true_fn_name, false_fn_name)', + 'ag__.if_stmt(test, lambda: true_expr, lambda: false_expr)', test=node.test, - true_fn_name=true_fn_name, - false_fn_name=false_fn_name) - - def _process_block(self, scope, block): - self.state[_Statement].enter() - self.state[_Statement].scope = scope - block = self.visit_block( - block, - before_visit=self.state[_FunctionDefs].enter, - after_visit=self._postprocess_statement) - self.state[_Statement].exit() - return block - - def visit_FunctionDef(self, node): - node.args = self.generic_visit(node.args) - node.decorator_list = self.visit_block(node.decorator_list) - node.body = self._process_block( - anno.getanno(node, anno.Static.SCOPE), node.body) - return node - - def visit_For(self, node): - node.target = self.visit(node.target) - node.body = self._process_block( - anno.getanno(node, NodeAnno.BODY_SCOPE), node.body) - node.orelse = self._process_block( - anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse) - return node - - def visit_While(self, node): - node.test = self.visit(node.test) - node.body = self._process_block( - anno.getanno(node, NodeAnno.BODY_SCOPE), node.body) - node.orelse = self._process_block( - anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse) - return node - - def visit_If(self, node): - node.test = self.visit(node.test) - node.body = self._process_block( - anno.getanno(node, NodeAnno.BODY_SCOPE), node.body) - node.orelse = self._process_block( - anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse) - return node - - def visit_With(self, node): - node.items = self.visit_block(node.items) - node.body = self._process_block( - anno.getanno(node, NodeAnno.BODY_SCOPE), node.body) - return node + true_expr=node.body, + false_expr=node.orelse) def transform(node, ctx): diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py index 53f4b0ddc8..8ba2558ac2 100644 --- a/tensorflow/python/autograph/operators/__init__.py +++ b/tensorflow/python/autograph/operators/__init__.py @@ -38,6 +38,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.autograph.operators.control_flow import for_stmt +from tensorflow.python.autograph.operators.control_flow import if_stmt from tensorflow.python.autograph.operators.control_flow import while_stmt from tensorflow.python.autograph.operators.data_structures import list_append from tensorflow.python.autograph.operators.data_structures import list_pop -- GitLab From 131f6f8429ffa0511a3d5a6a595843d3d96ec942 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 10 Oct 2018 08:28:08 -0700 Subject: [PATCH 196/411] cond_v2: raise an error if pred is a Python bool. This is to match the existing behavior of tf.cond. PiperOrigin-RevId: 216534084 --- .../python/kernel_tests/cond_v2_test.py | 34 +++++++++++-------- .../kernel_tests/control_flow_ops_py_test.py | 1 - tensorflow/python/ops/cond_v2_impl.py | 3 ++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py index a424a0f219..0e7c2f8ae6 100644 --- a/tensorflow/python/kernel_tests/cond_v2_test.py +++ b/tensorflow/python/kernel_tests/cond_v2_test.py @@ -670,7 +670,7 @@ class CondV2CollectionTest(test.TestCase): y_const = constant_op.constant(ops.get_collection("y")[0]) return math_ops.add(x_const, y_const) - cnd = cond_v2.cond_v2(True, fn, fn) + cnd = cond_v2.cond_v2(constant_op.constant(True), fn, fn) self.assertEquals(cnd.eval(), 7) def testCollectionTensorValueAccessInCond(self): @@ -705,9 +705,7 @@ class CondV2CollectionTest(test.TestCase): z = math_ops.add(x, y) return math_ops.mul(x, z) - cnd = cond_v2.cond_v2( - True, true_fn, - false_fn) + cnd = cond_v2.cond_v2(constant_op.constant(True), true_fn, false_fn) self.assertEquals(cnd.eval(), 14) read_z_collection = ops.get_collection("z") @@ -780,10 +778,12 @@ class CondV2ContainerTest(test.TestCase): return constant_op.constant(6.0) with ops.container("l1"): - cnd_true = cond_v2.cond_v2(True, true_fn, false_fn) + cnd_true = cond_v2.cond_v2( + constant_op.constant(True), true_fn, false_fn) self.assertEquals(cnd_true.eval(), 2) - cnd_false = cond_v2.cond_v2(False, true_fn, false_fn) + cnd_false = cond_v2.cond_v2( + constant_op.constant(False), true_fn, false_fn) self.assertEquals(cnd_false.eval(), 6) v4 = variables.Variable([3]) @@ -812,7 +812,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): return c with ops.colocate_with(a.op): - self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn, fn).eval(), 3) def fn2(): c = constant_op.constant(3.0) @@ -821,7 +822,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): with ops.colocate_with(a.op): with ops.colocate_with(b.op): - self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3) def testColocateWithInAndOutOfCond(self): with ops.Graph().as_default() as g: @@ -837,7 +839,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): return c with ops.colocate_with(a.op): - self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3) d = constant_op.constant([2.0], name="d") self.assertEqual([b"loc:@a"], d.op.colocation_groups()) @@ -858,7 +861,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): with ops.colocate_with(b.op): c = math_ops.add(a, a, name="c") return c - out_cond_2 = cond_v2.cond_v2(True, fn, fn) + out_cond_2 = cond_v2.cond_v2(constant_op.constant(True), fn, fn) run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() @@ -880,7 +883,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): return c with ops.device("/device:CPU:0"): - self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn, fn).eval(), 3) def fn2(): c = constant_op.constant(3.0) @@ -888,7 +892,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): return c with ops.device("/device:GPU:0"): - self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3) def testDeviceInAndOutOfCond(self): with ops.Graph().as_default() as g: @@ -902,7 +907,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): return c with ops.device("/device:CPU:0"): - self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3) + self.assertEquals( + cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3) d = constant_op.constant(4.0) self.assertEqual("/device:CPU:0", d.op.device) @@ -921,7 +927,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): with ops.device("/device:CPU:0"): a = constant_op.constant([2.0], name="a") - out_cond_2 = cond_v2.cond_v2(True, fn, fn) + out_cond_2 = cond_v2.cond_v2(constant_op.constant(True), fn, fn) run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index a5f85b97f7..46b8b10e90 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -333,7 +333,6 @@ class ControlFlowTest(test.TestCase): with self.assertRaisesOpError("has inputs from different frames"): res.eval(feed_dict={data: 1.0}) - @test_util.disable_control_flow_v2("b/113294340") def testCondBool(self): values = constant_op.constant(10) fn1 = lambda: math_ops.add(values, 1) diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py index c9aa4d4889..81d9cba042 100644 --- a/tensorflow/python/ops/cond_v2_impl.py +++ b/tensorflow/python/ops/cond_v2_impl.py @@ -52,6 +52,9 @@ _gradients_impl = None def cond_v2(pred, true_fn, false_fn, name="cond"): """Like tf.cond, except emits a single If op.""" + if isinstance(pred, bool): + raise TypeError("pred must not be a Python bool", pred) + if not name: name = "cond" -- GitLab From 79af30d357fbe0869e163e1d9dce0cb869b3724f Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 10 Oct 2018 08:36:36 -0700 Subject: [PATCH 197/411] [Grappler] Add RemoveStackStridedSliceSameAxis optimizer. // Replace operations of the form: // x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i,...] // with // a_i // when the strided slice index `i` is applied in the k'th axis. // // Similarly, replace operations of the form: // x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i:i+1,...] // with // expand_dims(a_i, axis=k) // PiperOrigin-RevId: 216535346 --- .../optimizers/arithmetic_optimizer.cc | 295 +++++++++++++++++- .../optimizers/arithmetic_optimizer.h | 3 + .../optimizers/arithmetic_optimizer_test.cc | 211 +++++++++++++ .../optimizers/graph_optimizer_stage.h | 4 + .../optimizers/graph_optimizer_stage_test.cc | 3 + 5 files changed, 515 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 7d5014ee0a..0c2686a419 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -46,6 +46,7 @@ limitations under the License. #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/saved_tensor_slice_util.h" +#include "tensorflow/core/util/strided_slice_op.h" using tensorflow::strings::StrCat; @@ -157,6 +158,14 @@ void SetSourceDataType(DataType dtype, NodeDef* node) { SetDataTypeToAttr(dtype, SourceDataTypeAttrName(*node), node); } +Status CheckAttrExists(const NodeDef& node, const string& key) { + if (node.attr().count(key) == 0) { + return errors::InvalidArgument("Node '", node.name(), "'lacks '", key, + "' attr: ", node.DebugString()); + } + return Status::OK(); +} + NodeDef* GetTailOfValuePreservingChain( const NodeDef& node, const NodeMap& node_map, const std::unordered_set& nodes_to_preserve) { @@ -2902,6 +2911,284 @@ class UnaryOpsComposition : public ArithmeticOptimizerStage { std::unordered_set fused_nodes_; }; +// Replace operations of the form: +// x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i,...] +// with +// a_i +// when the strided slice index `i` is applied in the k'th axis. +// +// Similarly, replace operations of the form: +// x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i:i+1,...] +// with +// expand_dims(a_i, axis=k) +// +// TODO(ebrevdo): Extend to also replace operations of the form +// concat((a_0, a_1, ..., ), axis=k)[:, ..., s_i:s_{i+1}, ...] +// with +// a_i, +// when +// s_i = cumsum(shape(a)[k] for a in (a_0, ...,))[i] +// and slicing is in the k'th axis. +class RemoveStackStridedSliceSameAxis : public ArithmeticOptimizerStage { + public: + explicit RemoveStackStridedSliceSameAxis( + const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveStackStridedSliceSameAxis", ctx, + ctx_ext) {} + ~RemoveStackStridedSliceSameAxis() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsStridedSlice(*node); + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { + // *node is a StridedSlice NodeDef. + NodeDef* pack; + + // Get the input and see if it's a Pack op. + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &pack)); + if (!IsPack(*pack)) return Status::OK(); + + bool return_early; + PartialTensorShape pack_output_shape; + int pack_axis; + TF_RETURN_IF_ERROR( + CheckInputs(node, pack, &pack_output_shape, &pack_axis, &return_early)); + if (return_early) return Status::OK(); + + int slice_start_value; + bool found; + TF_RETURN_IF_ERROR(GetSliceAxis(node, pack, pack_output_shape, pack_axis, + &slice_start_value, &found)); + if (!found) return Status::OK(); + + return RewriteGraph(node, pack, slice_start_value, pack_axis, + simplified_node_name); + } + + protected: + bool IsReallyConstant(const NodeDef& node) const { + if (!IsConstant(node)) { + return false; + } + // If the node is fed it's not constant anymore. + return ctx().feed_nodes->find(node.name()) == ctx().feed_nodes->end(); + } + + bool GetConstantAsInt64(const NodeDef& node, DataType dtype, + std::vector* values) { + if (dtype == DT_INT32) { + std::vector values_int32; + if (!ValuesFromConstNode(node, &values_int32)) { + return false; + } + std::copy(values_int32.begin(), values_int32.end(), + std::inserter(*values, values->begin())); + return true; + } else { + return ValuesFromConstNode(node, values); + } + } + + Status CheckInputs(const NodeDef* node, const NodeDef* pack, + PartialTensorShape* pack_output_shape, int* pack_axis, + bool* return_early) { + *return_early = true; + TF_RETURN_IF_ERROR(CheckAttrExists(*pack, "axis")); + + *pack_axis = pack->attr().at("axis").i(); + auto slice_properties = + ctx().graph_properties->GetInputProperties(node->name()); + *pack_output_shape = slice_properties[0].shape(); + if (pack_output_shape->unknown_rank()) { + return Status::OK(); + } + const int pack_input_rank = pack_output_shape->dims() - 1; + if (*pack_axis < 0) { + // The ndims of any input into Pack op is its output ndims - 1. + *pack_axis += pack_input_rank; + } + if (*pack_axis < 0 || *pack_axis >= pack_input_rank) { + return errors::InvalidArgument( + "Pack node (", pack->name(), + ") axis attribute is out of bounds: ", pack->attr().at("axis").i()); + } + *return_early = false; + return Status::OK(); + } + + Status GetSliceAxis(const NodeDef* node, const NodeDef* pack, + const PartialTensorShape& pack_output_shape, + int pack_axis, int* slice_start_value, bool* found) { + *found = false; + for (auto key : {"begin_mask", "end_mask", "ellipsis_mask", "new_axis_mask", + "shrink_axis_mask"}) { + TF_RETURN_IF_ERROR(CheckAttrExists(*node, key)); + } + + const int begin_mask = node->attr().at("begin_mask").i(); + const int end_mask = node->attr().at("end_mask").i(); + const int ellipsis_mask = node->attr().at("ellipsis_mask").i(); + const int new_axis_mask = node->attr().at("new_axis_mask").i(); + const int shrink_axis_mask = node->attr().at("shrink_axis_mask").i(); + + // Check that the StridedSlice is one of these at pack_axis: + // [..., i, ...] + // [..., i:i+1, ...] + // [..., :1, ...] + // [..., -1:, ...] + /// [..., s_{pack_axis}-1:, ...] + NodeDef* slice_begin; + NodeDef* slice_end; + NodeDef* slice_strides; + TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &slice_begin)); + TF_RETURN_IF_ERROR(GetInputNode(node->input(2), &slice_end)); + TF_RETURN_IF_ERROR(GetInputNode(node->input(3), &slice_strides)); + + for (const auto* n : {slice_begin, slice_end, slice_strides}) { + if (!IsReallyConstant(*n)) return Status::OK(); + } + + Tensor slice_begin_t; + Tensor slice_end_t; + Tensor slice_strides_t; + + TF_RETURN_IF_ERROR(CheckAttrExists(*slice_begin, "value")); + TF_RETURN_IF_ERROR(CheckAttrExists(*slice_end, "value")); + + if (!slice_begin_t.FromProto(slice_begin->attr().at("value").tensor())) { + return Status::OK(); + } + if (!slice_end_t.FromProto(slice_end->attr().at("value").tensor())) { + return Status::OK(); + } + if (!slice_strides_t.FromProto( + slice_strides->attr().at("value").tensor())) { + return Status::OK(); + } + TensorShape processing_shape; + TensorShape final_shape; + bool is_identity; + bool is_simple_slice; + bool slice_dim0; + gtl::InlinedVector slice_begin_vec; + gtl::InlinedVector slice_end_vec; + gtl::InlinedVector slice_strides_vec; + TF_RETURN_IF_ERROR(ValidateStridedSliceOp( + &slice_begin_t, &slice_end_t, slice_strides_t, pack_output_shape, + begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, + &processing_shape, &final_shape, &is_identity, &is_simple_slice, + &slice_dim0, &slice_begin_vec, &slice_end_vec, &slice_strides_vec)); + + if (!is_simple_slice) return Status::OK(); + + int begin_index = -1; + int64 begin_value = 0; + for (int i = 0; i < slice_begin_vec.size(); ++i) { + const int64 v = slice_begin_vec[i]; + if (v != 0) { + if (begin_index != -1) { + // At least two start values that are nonzero. + return Status::OK(); + } + begin_index = i; + begin_value = v; + } + } + + int end_index = -1; + int64 end_value = 0; + for (int i = 0; i < slice_end_vec.size(); ++i) { + const int64 v = slice_end_vec[i]; + if (v != pack_output_shape.dim_size(i)) { + if (end_index != -1) { + // At least two end values that are nonzero. + return Status::OK(); + } + end_index = i; + end_value = v; + } + } + + if (begin_index == -1 && end_index == -1) return Status::OK(); + if (begin_index != -1 && end_index != -1 && begin_index != end_index) { + // Somehow received different axes for begin/end slicing + return Status::OK(); + } + const int slice_axis = (begin_index == -1) ? end_index : begin_index; + if (slice_axis != pack_axis) { + // Not slicing on the same axis as the Pack op. + return Status::OK(); + } + *slice_start_value = (begin_index == -1) ? 0 : begin_value; + const int64 slice_end_value = + (end_index == -1) ? pack_output_shape.dim_size(slice_axis) : end_value; + if (slice_end_value != *slice_start_value + 1) { + // Not slicing a single value out. + return Status::OK(); + } + + if (*slice_start_value < 0 || *slice_start_value >= pack->input_size()) { + return errors::InvalidArgument( + "Node ", node->name(), " requested invalid slice index ", + *slice_start_value, " on axis ", slice_axis, + " from tensor of shape: ", pack_output_shape.DebugString()); + } + + *found = true; // slice_start_value is valid. + return Status::OK(); + } + + Status RewriteGraph(const NodeDef* node, const NodeDef* pack, + int slice_start_value, int pack_axis, + string* simplified_node_name) { + OpInfo::TensorProperties input_slice_properties; + NodeDef* input_slice; + TF_RETURN_IF_ERROR( + GetInputNode(pack->input(slice_start_value), &input_slice)); + TF_RETURN_IF_ERROR(GetTensorProperties(pack->input(slice_start_value), + &input_slice_properties)); + PartialTensorShape input_slice_shape(input_slice_properties.shape()); + + OpInfo::TensorProperties output_properties; + TF_RETURN_IF_ERROR(GetTensorProperties( + strings::StrCat(node->name(), ":", 0), &output_properties)); + PartialTensorShape output_shape(output_properties.shape()); + NodeDef* output = + AddEmptyNode(OptimizedNodeName(ParseNodeScopeAndName(node->name()))); + if (input_slice_shape.IsCompatibleWith(output_shape)) { + output->set_op("Identity"); + output->set_device(node->device()); + SetDataTypeToAttr(output_properties.dtype(), "T", output); + output->add_input(input_slice->name()); + } else { + NodeDef* axis = AddEmptyNode( + OptimizedNodeName(ParseNodeScopeAndName(node->name()), "Axis")); + axis->set_op("Const"); + axis->set_device(node->device()); + auto axis_attr = axis->mutable_attr(); + SetDataTypeToAttr(DT_INT32, "dtype", axis); + auto* axis_t = (*axis_attr)["value"].mutable_tensor(); + axis_t->set_dtype(DT_INT32); + axis_t->add_int_val(pack_axis); + AddToOptimizationQueue(axis); + output->set_op("ExpandDims"); + output->set_device(node->device()); + SetDataTypeToAttr(output_properties.dtype(), "T", output); + output->add_input(input_slice->name()); + output->add_input(axis->name()); + } + + // Copy dependencies over. + ForwardControlDependencies(output, {node, pack}); + AddToOptimizationQueue(output); + *simplified_node_name = output->name(); + + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -3132,7 +3419,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { const GraphOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, graph_properties_.get(), node_map_.get(), - opt_level_); + &feed_nodes_, opt_level_); const ArithmeticOptimizerContext ctx_ext(&nodes_to_simplify); // Stop pipeline after first stage returning non-empty simplified tensor name. @@ -3186,6 +3473,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.unary_ops_composition) pipeline.AddStage(ctx, ctx_ext); + if (options_.remove_stack_strided_slice_same_axis) + pipeline.AddStage(ctx, ctx_ext); VLOG(1) << "Run " << pipeline.NumStages() << " arithmetic optimizer stages: " << str_util::Join(pipeline.StageNames(), ", "); @@ -3249,6 +3538,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, optimized_graph_ = &optimized_item.graph; node_map_.reset(new NodeMap(optimized_graph_)); + for (const auto& feed : item.feed) { + feed_nodes_.insert(NodeName(feed.first)); + } + // Disable restricted graph rewrites. options_.unary_ops_composition &= item.allowed_optimizations.non_differentiable_rewrites; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index d457eb6d21..bb56f61e30 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -79,6 +80,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool convert_log1p = true; bool convert_expm1 = true; bool unary_ops_composition = true; + bool remove_stack_strided_slice_same_axis = false; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. @@ -128,6 +130,7 @@ class ArithmeticOptimizer : public GraphOptimizer { std::unique_ptr node_map_; std::unique_ptr graph_properties_; GraphDef* optimized_graph_ = nullptr; // Not owned. + gtl::FlatSet feed_nodes_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 77f3c64c65..d091b26b65 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -288,6 +288,12 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.unary_ops_composition = true; } + + void EnableOnlyRemoveStackStridedSliceSameAxis( + ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_stack_strided_slice_same_axis = true; + } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -3364,5 +3370,210 @@ TEST_F(ArithmeticOptimizerTest, UnaryOpsComposition) { test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } +TEST_F(ArithmeticOptimizerTest, RemoveStackStridedSliceSameAxis) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto a_in = + ops::Const(s.WithOpName("a_in"), {1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); + auto b_in = + ops::Const(s.WithOpName("b_in"), {-1.0f, -2.0f, -3.0f, -4.0f}, {2, 2}); + auto c_in = + ops::Const(s.WithOpName("c_in"), {5.0f, 6.0f, 7.0f, 8.0f}, {2, 2}); + auto a = ops::PlaceholderWithDefault(s.WithOpName("a"), a_in, + PartialTensorShape({-1, -1})); + auto b = ops::PlaceholderWithDefault(s.WithOpName("b"), b_in, + PartialTensorShape({-1, -1})); + auto c = ops::PlaceholderWithDefault(s.WithOpName("c"), c_in, + PartialTensorShape({-1, -1})); + // stacked = tf.stack((a, b, c), axis=1). + // stacked.shape == [2, 3, 2] (a, b, c are stacked along new axis 1) + auto stacked = + ops::Stack(s.WithOpName("stacked"), {a.output, b.output, c.output}, + ops::Stack::Axis(1)); + auto expanded_a = ops::ExpandDims(s.WithOpName("expanded_a"), a, {1}); + auto expanded_b = ops::ExpandDims(s.WithOpName("expanded_b"), b, {1}); + auto expanded_c = ops::ExpandDims(s.WithOpName("expanded_c"), c, {1}); + auto begin_a = ops::Const(s.WithOpName("begin_a"), {0, 0, 0}, {3}); + auto end_a = ops::Const(s.WithOpName("end_a"), {0, 1, 0}, {3}); + auto begin_b = ops::Const(s.WithOpName("begin_b"), {0, 1, 0}, {3}); + auto end_b = ops::Const(s.WithOpName("end_b"), {0, 2, 0}, {3}); + auto begin_c = ops::Const(s.WithOpName("begin_c"), {0, 2, 0}, {3}); + auto end_c = ops::Const(s.WithOpName("end_c"), {0, 3, 0}, {3}); + auto end_c_1to = ops::Const(s.WithOpName("begin_c_2to"), {0, 0, 0}, {3}); + auto strides = ops::Const(s.WithOpName("strides"), {1, 1, 1}, {3}); + + // stacked[:, 0] + using SS = ops::StridedSlice; + auto pa_slice = ops::Identity( + s.WithOpName("pa_slice_out"), + SS(s.WithOpName("pa_slice"), stacked, begin_a, end_a, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0b0010))); // 2 + + // stacked[:, 1] + auto pb_slice = ops::Identity( + s.WithOpName("pb_slice_out"), + SS(s.WithOpName("pb_slice"), stacked, begin_b, end_b, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0b0010))); // 2 + + // stacked[:, 2] + auto pc_slice = ops::Identity( + s.WithOpName("pc_slice_out"), + SS(s.WithOpName("pc_slice"), stacked, begin_c, end_c, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0b0010))); // 2 + + // stacked[:, 0:1, :] + auto pa_slice_01 = ops::Identity( + s.WithOpName("pa_slice_01_out"), + SS(s.WithOpName("pa_slice_01"), stacked, begin_a, end_a, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0))); + + // stacked[:, :1, :] + auto pa_slice_to1 = ops::Identity( + s.WithOpName("pa_slice_to1_out"), + SS(s.WithOpName("pa_slice_to1"), stacked, begin_a, end_a, strides, + SS::BeginMask(0b0111) // 7 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0))); + + // stacked[:, 1:2, :] + auto pb_slice_12 = ops::Identity( + s.WithOpName("pb_slice_12_out"), + SS(s.WithOpName("pb_slice_12"), stacked, begin_b, end_b, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0101) // 5 + .NewAxisMask(0) + .ShrinkAxisMask(0))); + + // stacked[:, 2:, :]. + auto pc_slice_2to = ops::Identity( + s.WithOpName("pc_slice_2to_out"), + SS(s.WithOpName("pc_slice_2to"), stacked, begin_c, end_c_1to, strides, + SS::BeginMask(0b0101) // 5 + .EllipsisMask(0) + .EndMask(0b0111) // 7 + .NewAxisMask(0) + .ShrinkAxisMask(0))); + + GrapplerItem item; + item.fetch = {"a", + "b", + "c", + "pa_slice_out", + "pb_slice_out", + "pc_slice_out", + "expanded_a", + "expanded_b", + "expanded_c", + "pa_slice_01_out", + "pa_slice_to1_out", + "pb_slice_12_out", + "pc_slice_2to_out"}; + enum FetchItem { + fA, + fB, + fC, + fASliceOut, + fBSliceOut, + fCSliceOut, + fExpandedA, + fExpandedB, + fExpandedC, + fASlice01Out, + fASliceTo1Out, + fBSlice12Out, + fCSlice2ToOut, + }; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + + // stacked[:, 0, :] == a. + test::ExpectTensorEqual(tensors_expected[fA], + tensors_expected[fASliceOut]); + // stacked[:, 1, :] == b. + test::ExpectTensorEqual(tensors_expected[fB], + tensors_expected[fBSliceOut]); + // stacked[:, 2, :] == c. + test::ExpectTensorEqual(tensors_expected[fC], + tensors_expected[fCSliceOut]); + + // stacked[:, 0:1, :] == expand_dims(a, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedA], + tensors_expected[fASlice01Out]); + + // stacked[:, :1, :] == expand_dims(a, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedA], + tensors_expected[fASliceTo1Out]); + + // stacked[:, 1:2, :] == expand_dims(b, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedB], + tensors_expected[fBSlice12Out]); + // stacked[:, 2:, :] == expand_dims(c, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedC], + tensors_expected[fCSlice2ToOut]); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyRemoveStackStridedSliceSameAxis(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); + + for (const auto& node : output.node()) { + if (node.name() == "pa_slice_out") { + EXPECT_EQ(node.input(0), "a"); + } else if (node.name() == "pb_slice_out") { + EXPECT_EQ(node.input(0), "b"); + } else if (node.name() == "pc_slice_out") { + EXPECT_EQ(node.input(0), "c"); + } else if (str_util::EndsWith(node.name(), "_out")) { + EXPECT_EQ(strings::StrCat(node.input(0), "_out"), + strings::StrCat( + "ArithmeticOptimizer/RemoveStackStridedSliceSameAxis_", + node.name())); + } + } + + auto tensors = EvaluateNodes(output, item.fetch); + + // stacked[:, 0, :] == a. + test::ExpectTensorEqual(tensors_expected[fA], tensors[fASliceOut]); + + // stacked[:, 1, :] == b. + test::ExpectTensorEqual(tensors_expected[fB], tensors[fBSliceOut]); + // stacked[:, 2, :] == c. + test::ExpectTensorEqual(tensors_expected[fC], tensors[fCSliceOut]); + + // stacked[:, 0:1, :] == expand_dims(a, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedA], + tensors[fASlice01Out]); + + // stacked[:, :1, :] == expand_dims(a, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedA], + tensors[fASliceTo1Out]); + + // stacked[:, 1:2, :] == expand_dims(b, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedB], + tensors[fBSlice12Out]); + // stacked[:, 2:, :] == expand_dims(c, 1). + test::ExpectTensorEqual(tensors_expected[fExpandedC], + tensors[fCSlice2ToOut]); +} + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h index 2afb5df431..f31a30ec0e 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -46,17 +47,20 @@ struct GraphOptimizerContext { GraphOptimizerContext(const std::unordered_set* nodes_to_preserve, GraphDef* optimized_graph, GraphProperties* graph_properties, NodeMap* node_map, + gtl::FlatSet* feed_nodes, RewriterConfig::Toggle opt_level) : nodes_to_preserve(nodes_to_preserve), optimized_graph(optimized_graph), graph_properties(graph_properties), node_map(node_map), + feed_nodes(feed_nodes), opt_level(opt_level) {} const std::unordered_set* nodes_to_preserve; GraphDef* optimized_graph; GraphProperties* graph_properties; NodeMap* node_map; + gtl::FlatSet* feed_nodes; RewriterConfig::Toggle opt_level; }; diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc index 34f28c7c27..799c40c67b 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc @@ -61,6 +61,7 @@ TEST_F(GraphOptimizerStageTest, OptimizedNodeName) { /*optimized_graph*/ nullptr, /*graph_properties*/ nullptr, /*node_name*/ nullptr, + /*feed_nodes*/ nullptr, /*opt_level*/ RewriterConfig::ON); FakeOptimizerStage stage("my_opt", "my_stg", ctx); @@ -97,6 +98,7 @@ TEST_F(GraphOptimizerStageTest, GetInputNodeAndProperties) { /*optimized_graph*/ &item.graph, /*graph_properties*/ &properties, /*node_name*/ &node_map, + /*feed_nodes*/ nullptr, /*opt_level*/ RewriterConfig::ON); FakeOptimizerStage stage("my_opt", "my_stg", ctx); @@ -137,6 +139,7 @@ TEST_F(GraphOptimizerStageTest, AddNodes) { /*optimized_graph*/ &item.graph, /*graph_properties*/ &properties, /*node_name*/ &node_map, + /*feed_nodes*/ nullptr, /*opt_level*/ RewriterConfig::ON); FakeOptimizerStage stage("my_opt", "my_stg", ctx); -- GitLab From 828e374bfbe788a1c5ddbdbbd36149785ad6d0e6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 08:43:35 -0700 Subject: [PATCH 198/411] remove debug statements PiperOrigin-RevId: 216536298 --- tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index ff67b306e0..41d02df5f0 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1130,7 +1130,6 @@ tensorflow::Status ConvertUnsupportedOperator( for (int i = 0; i < op_def->output_arg_size(); ++i) { string multiples = op_def->output_arg(i).number_attr(); int num_outputs = multiples.empty() ? 1 : GetIntAttr(node, multiples); - LOG(INFO) << "dddddddd " << num_outputs; for (int j = 0; j < num_outputs; ++j) { if (next_output == 0) { op->outputs.push_back(node.name()); // Implicit :0. @@ -1141,7 +1140,6 @@ tensorflow::Status ConvertUnsupportedOperator( } } } else { - LOG(INFO) << "nodef!!!!!!!!!!! "; op->outputs.push_back(node.name()); // Implicit :0. } -- GitLab From ac53355550c994cf22485d977125b23153162593 Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 10 Oct 2018 16:34:16 +0000 Subject: [PATCH 199/411] Make nccl2 bazel configuration platform independent --- third_party/nccl/build_defs.bzl.tpl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl index ede1d3dad5..886f6ddcd4 100644 --- a/third_party/nccl/build_defs.bzl.tpl +++ b/third_party/nccl/build_defs.bzl.tpl @@ -152,6 +152,14 @@ def device_link(name, srcs): suffix = ".pic.a", ) + cpu_arch = "X86_64" + native.genrule( + name = "get_cpu_gen", + outs = [cpu_arch], + cmd = "uname -m", + ) + cpu_arch = cpu_arch.upper() + # Device-link to cubins for each architecture. images = [] cubins = [] @@ -159,7 +167,7 @@ def device_link(name, srcs): cubin = "%s_%s.cubin" % (name, arch) register_hdr = "%s_%s.h" % (name, arch) nvlink = "@local_config_nccl//:nvlink" - cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink + + cmd = ("$(location %s) --cpu-arch=%s " % (nvlink, cpu_arch) + "--arch=%s $(SRCS) " % arch + "--register-link-binaries=$(location %s) " % register_hdr + "--output-file=$(location %s)" % cubin) @@ -207,4 +215,5 @@ def device_link(name, srcs): "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cudart_static", ], + defines = ["__NV_EXTRA_INITIALIZATION=", "__NV_EXTRA_FINALIZATION="] ) -- GitLab From f02251190f5908d2078e9fc11b92375dfc3a3054 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Wed, 10 Oct 2018 09:47:43 -0700 Subject: [PATCH 200/411] Correct a couple of format strings Change a couple of fscanf-style format strings to use the format macro constants defined in cinttypes. This quashes -Wformat. PiperOrigin-RevId: 216545604 --- tensorflow/core/lib/strings/numbers.cc | 3 ++- tensorflow/core/util/command_line_flags.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index 87aa5915ff..fff6f1fedc 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -391,7 +392,7 @@ string FpToString(Fprint fp) { bool StringToFp(const string& s, Fprint* fp) { char junk; uint64_t result; - if (sscanf(s.c_str(), "%lx%c", &result, &junk) == 1) { + if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) { *fp = result; return true; } else { diff --git a/tensorflow/core/util/command_line_flags.cc b/tensorflow/core/util/command_line_flags.cc index 55f1e30880..f1196fdfec 100644 --- a/tensorflow/core/util/command_line_flags.cc +++ b/tensorflow/core/util/command_line_flags.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include @@ -70,7 +71,7 @@ bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag, str_util::ConsumePrefix(&arg, "=")) { char extra; int64_t parsed_int64; - if (sscanf(arg.data(), "%ld%c", &parsed_int64, &extra) != 1) { + if (sscanf(arg.data(), "%" SCNd64 "%c", &parsed_int64, &extra) != 1) { LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag << "."; *value_parsing_ok = false; -- GitLab From 24593b1c6bd7b05115cbc4e61b08cf6953541a5c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 09:54:44 -0700 Subject: [PATCH 201/411] Adds `get_config` and `from_config` to Optimizers V2. PiperOrigin-RevId: 216546565 --- .../optimizer_v2/checkpointable_utils_test.py | 10 +++--- .../python/keras/optimizer_v2/adadelta.py | 10 ++++++ .../keras/optimizer_v2/adadelta_test.py | 17 +++++++++ .../python/keras/optimizer_v2/adagrad.py | 8 +++++ .../python/keras/optimizer_v2/adagrad_test.py | 13 +++++++ tensorflow/python/keras/optimizer_v2/adam.py | 10 ++++++ .../python/keras/optimizer_v2/adam_test.py | 11 ++++++ .../optimizer_v2/checkpointable_utils_test.py | 10 +++--- .../python/keras/optimizer_v2/optimizer_v2.py | 36 +++++++++++++++++++ .../python/keras/optimizer_v2/rmsprop.py | 11 ++++++ .../python/keras/optimizer_v2/rmsprop_test.py | 22 ++++++++++++ tensorflow/python/keras/optimizer_v2/sgd.py | 14 ++++++++ .../python/keras/optimizer_v2/sgd_test.py | 14 ++++++++ 13 files changed, 178 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py index 3c68ef995a..3e225ff0dd 100644 --- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -143,10 +143,12 @@ class CheckpointingTests(test.TestCase): suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names] - # The Dense layers also save get_config() JSON - expected_checkpoint_names.extend( - ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", - "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"]) + # The optimizer and Dense layers also save get_config() JSON + expected_checkpoint_names.extend([ + "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON" + ]) named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py index d3b3c9c12e..2ae930fe35 100644 --- a/tensorflow/python/keras/optimizer_v2/adadelta.py +++ b/tensorflow/python/keras/optimizer_v2/adadelta.py @@ -37,6 +37,7 @@ class Adadelta(optimizer_v2.OptimizerV2): Tensor or a Python value. Arguments: + learning_rate: float hyperparameter >= 0. Learning rate. It is recommended to leave it at the default value. rho: float hyperparameter >= 0. The decay rate. @@ -114,3 +115,12 @@ class Adadelta(optimizer_v2.OptimizerV2): grad, indices, use_locking=self._use_locking) + + def get_config(self): + config = super(Adadelta, self).get_config() + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "rho": self._serialize_hyperparameter("rho"), + "epsilon": self._serialize_hyperparameter("epsilon") + }) + return config diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py index 6e48f92e4f..6c8fa874e1 100644 --- a/tensorflow/python/keras/optimizer_v2/adadelta_test.py +++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.keras.optimizer_v2 import adadelta from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops @@ -161,6 +162,22 @@ class AdadeltaOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( [[-111, -138]], var0.eval()) + def testConfig(self): + + def rho(): + return ops.convert_to_tensor(1.0) + + epsilon = ops.convert_to_tensor(1.0) + + opt = adadelta.Adadelta(learning_rate=1.0, rho=rho, epsilon=epsilon) + config = opt.get_config() + opt2 = adadelta.Adadelta.from_config(config) + self.assertEqual(opt._hyper["learning_rate"][1], + opt2._hyper["learning_rate"][1]) + self.assertEqual(opt._hyper["rho"][1].__name__, + opt2._hyper["rho"][1].__name__) + self.assertEqual(opt._hyper["epsilon"][1], opt2._hyper["epsilon"][1]) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py index 2d8cec2300..fe3e8799ef 100644 --- a/tensorflow/python/keras/optimizer_v2/adagrad.py +++ b/tensorflow/python/keras/optimizer_v2/adagrad.py @@ -117,3 +117,11 @@ class Adagrad(optimizer_v2.OptimizerV2): grad, indices, use_locking=self._use_locking) + + def get_config(self): + config = super(Adagrad, self).get_config() + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "initial_accumulator_value": self._initial_accumulator_value + }) + return config diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py index fc4ef5c399..81a0993897 100644 --- a/tensorflow/python/keras/optimizer_v2/adagrad_test.py +++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import types as python_types + import numpy as np from tensorflow.python.framework import constant_op @@ -271,6 +273,17 @@ class AdagradOptimizerTest(test.TestCase): # Creating optimizer should cause no exception. adagrad.Adagrad(3.0, initial_accumulator_value=0.1) + def testConfig(self): + opt = adagrad.Adagrad( + learning_rate=lambda: ops.convert_to_tensor(1.0), + initial_accumulator_value=2.0) + config = opt.get_config() + opt2 = adagrad.Adagrad.from_config(config) + self.assertIsInstance(opt2._hyper["learning_rate"][1], + python_types.LambdaType) + self.assertEqual(opt._initial_accumulator_value, + opt2._initial_accumulator_value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py index 8367228d7a..9f1b3330e5 100644 --- a/tensorflow/python/keras/optimizer_v2/adam.py +++ b/tensorflow/python/keras/optimizer_v2/adam.py @@ -201,3 +201,13 @@ class Adam(optimizer_v2.OptimizerV2): update_beta_2 = beta_2_power.assign( beta_2_power * state.get_hyper("beta_2"), use_locking=self._use_locking) return control_flow_ops.group(update_beta_1, update_beta_2) + + def get_config(self): + config = super(Adam, self).get_config() + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "beta_1": self._serialize_hyperparameter("beta_1"), + "beta_2": self._serialize_hyperparameter("beta_2"), + "epsilon": self._serialize_hyperparameter("epsilon") + }) + return config diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py index 77796317a1..ff328cf925 100644 --- a/tensorflow/python/keras/optimizer_v2/adam_test.py +++ b/tensorflow/python/keras/optimizer_v2/adam_test.py @@ -329,5 +329,16 @@ class AdamOptimizerTest(test.TestCase): # for v1 and v2 respectively. self.assertEqual(6, len(set(opt.variables()))) + def testConfig(self): + opt = adam.Adam(learning_rate=1.0, beta_1=2.0, beta_2=3.0, epsilon=4.0) + config = opt.get_config() + opt2 = adam.Adam.from_config(config) + self.assertEqual(opt._hyper["learning_rate"][1], + opt2._hyper["learning_rate"][1]) + self.assertEqual(opt._hyper["beta_1"][1], opt2._hyper["beta_1"][1]) + self.assertEqual(opt._hyper["beta_2"][1], opt2._hyper["beta_2"][1]) + self.assertEqual(opt._hyper["epsilon"][1], opt2._hyper["epsilon"][1]) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py index 338c04148b..9e66eca9b0 100644 --- a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py @@ -143,10 +143,12 @@ class CheckpointingTests(test.TestCase): suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names] - # The Dense layers also save get_config() JSON - expected_checkpoint_names.extend( - ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", - "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"]) + # The optimizer and Dense layers also save get_config() JSON + expected_checkpoint_names.extend([ + "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON" + ]) named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index bd5557f4fd..7d05be694e 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -1319,6 +1319,42 @@ class OptimizerV2(optimizer_v1.Optimizer): variable=variable, optional_op_name=self._name) + def get_config(self): + """Returns the config of the optimimizer. + + An optimizer config is a Python dictionary (serializable) + containing the configuration of an optimizer. + The same optimizer can be reinstantiated later + (without any saved state) from this configuration. + + Returns: + Python dictionary. + """ + return {"name": self._name} + + @classmethod + def from_config(cls, config, custom_objects=None): + """Creates an optimizer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same optimizer from the config + dictionary. + + Arguments: + config: A Python dictionary, typically the output of get_config. + custom_objects: A Python dictionary mapping names to additional Python + objects used to create this optimizer, such as a function used for a + hyperparameter. + + Returns: + An optimizer instance. + """ + return cls(**config) + + def _serialize_hyperparameter(self, hyperparameter_name): + """Serialize a hyperparameter that can be a float, callable, or Tensor.""" + return self._hyper[hyperparameter_name][1] + # -------------- # Unsupported parent methods # -------------- diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py index 2748d8eff7..9b9d0c12d5 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py @@ -237,3 +237,14 @@ class RMSProp(optimizer_v2.OptimizerV2): grad, indices, use_locking=self._use_locking) + + def get_config(self): + config = super(RMSProp, self).get_config() + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "rho": self._serialize_hyperparameter("rho"), + "momentum": self._serialize_hyperparameter("momentum"), + "epsilon": self._serialize_hyperparameter("epsilon"), + "centered": self._centered + }) + return config diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py index 2c5eccdc5b..851c6b283e 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import copy import math +import types as python_types from absl.testing import parameterized import numpy as np @@ -439,6 +440,27 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))) ]), var1.eval()) + def testConfig(self): + + def momentum(): + return ops.convert_to_tensor(3.0) + + opt = rmsprop.RMSProp( + learning_rate=1.0, + rho=2.0, + momentum=momentum, + epsilon=lambda: ops.convert_to_tensor(4.0), + centered=True) + config = opt.get_config() + opt2 = rmsprop.RMSProp.from_config(config) + self.assertEqual(opt._hyper["learning_rate"][1], + opt2._hyper["learning_rate"][1]) + self.assertEqual(opt._hyper["rho"][1], opt2._hyper["rho"][1]) + self.assertEqual(opt._hyper["momentum"][1].__name__, + opt2._hyper["momentum"][1].__name__) + self.assertIsInstance(opt2._hyper["epsilon"][1], python_types.LambdaType) + self.assertEqual(True, opt2._centered) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/sgd.py b/tensorflow/python/keras/optimizer_v2/sgd.py index f5583691f7..4bd0671735 100644 --- a/tensorflow/python/keras/optimizer_v2/sgd.py +++ b/tensorflow/python/keras/optimizer_v2/sgd.py @@ -168,3 +168,17 @@ class SGD(optimizer_v2.OptimizerV2): grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype), grad.indices, grad.dense_shape) return var.scatter_sub(delta, use_locking=self._use_locking) + + def get_config(self): + config = super(SGD, self).get_config() + # Control whether momentum variables are created. + if not self._use_momentum: + momentum = None + else: + momentum = self._serializer_hyperparameter("momentum") + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "momentum": momentum, + "nesterov": self._use_nesterov + }) + return config diff --git a/tensorflow/python/keras/optimizer_v2/sgd_test.py b/tensorflow/python/keras/optimizer_v2/sgd_test.py index eb39aac283..b1dc4fab61 100644 --- a/tensorflow/python/keras/optimizer_v2/sgd_test.py +++ b/tensorflow/python/keras/optimizer_v2/sgd_test.py @@ -754,6 +754,20 @@ class MomentumOptimizerTest(test.TestCase): (0.9 * 0.01 + 0.01) * 2.0) ]), var1.eval()) + def testConfig(self): + opt = sgd.SGD(learning_rate=1.0, momentum=2.0, nesterov=True) + config = opt.get_config() + opt2 = sgd.SGD.from_config(config) + self.assertEqual(opt._hyper["learning_rate"][1], + opt2._hyper["learning_rate"][1]) + self.assertEqual(opt._hyper["momentum"][1], opt2._hyper["momentum"][1]) + self.assertEqual(opt2._use_nesterov, True) + + opt = sgd.SGD(momentum=None) + config = opt.get_config() + opt2 = sgd.SGD.from_config(config) + self.assertEqual(False, opt2._use_momentum) + if __name__ == "__main__": test.main() -- GitLab From 9fe6fe02a1f9ae89bdf395a4a092b62602eec36e Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Wed, 10 Oct 2018 09:58:31 -0700 Subject: [PATCH 202/411] Add comment about adding --define=with_xla_support=true build option for mnist_softmax_xla example. PiperOrigin-RevId: 216547149 --- tensorflow/examples/tutorials/mnist/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD index 99da44d6d5..a3dd927509 100644 --- a/tensorflow/examples/tutorials/mnist/BUILD +++ b/tensorflow/examples/tutorials/mnist/BUILD @@ -83,6 +83,8 @@ py_binary( ], ) +# Note: we need to add --define=with_xla_support=true to the build command in +# order to run the mnist_softmax_xla example with xla. py_binary( name = "mnist_softmax_xla", srcs = [ -- GitLab From e09ddb4290f74f053c6420d7bc140486b237404b Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 10:05:49 -0700 Subject: [PATCH 203/411] Port the conditional control flow conversion to the new operators. PiperOrigin-RevId: 216548561 --- .../autograph/converters/control_flow.py | 31 ++++++---- .../autograph/operators/control_flow_test.py | 34 +++++++---- tensorflow/python/autograph/utils/BUILD | 11 ---- tensorflow/python/autograph/utils/__init__.py | 1 - .../autograph/utils/multiple_dispatch.py | 56 ------------------- .../autograph/utils/multiple_dispatch_test.py | 46 --------------- 6 files changed, 45 insertions(+), 134 deletions(-) delete mode 100644 tensorflow/python/autograph/utils/multiple_dispatch.py delete mode 100644 tensorflow/python/autograph/utils/multiple_dispatch_test.py diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py index 70879f6c97..e52e98f42a 100644 --- a/tensorflow/python/autograph/converters/control_flow.py +++ b/tensorflow/python/autograph/converters/control_flow.py @@ -49,12 +49,23 @@ class ControlFlowTransformer(converter.Base): def _create_cond_branch(self, body_name, aliased_orig_names, aliased_new_names, body, returns): + if len(returns) == 1: + template = """ + return retval + """ + return_stmt = templates.replace(template, retval=returns[0]) + else: + template = """ + return (retvals,) + """ + return_stmt = templates.replace(template, retvals=returns) + if aliased_orig_names: template = """ def body_name(): aliased_new_names, = aliased_orig_names, body - return (returns,) + return_stmt """ return templates.replace( template, @@ -62,20 +73,20 @@ class ControlFlowTransformer(converter.Base): body=body, aliased_orig_names=aliased_orig_names, aliased_new_names=aliased_new_names, - returns=returns) + return_stmt=return_stmt) else: template = """ def body_name(): body - return (returns,) + return_stmt """ return templates.replace( - template, body_name=body_name, body=body, returns=returns) + template, body_name=body_name, body=body, return_stmt=return_stmt) def _create_cond_expr(self, results, test, body_name, orelse_name): if results is not None: template = """ - results = ag__.utils.run_cond(test, body_name, orelse_name) + results = ag__.if_stmt(test, body_name, orelse_name) """ return templates.replace( template, @@ -85,7 +96,7 @@ class ControlFlowTransformer(converter.Base): orelse_name=orelse_name) else: template = """ - ag__.utils.run_cond(test, body_name, orelse_name) + ag__.if_stmt(test, body_name, orelse_name) """ return templates.replace( template, test=test, body_name=body_name, orelse_name=orelse_name) @@ -111,7 +122,7 @@ class ControlFlowTransformer(converter.Base): elif s.is_composite(): # Special treatment for compound objects: if any of their owner entities # are live, then they are outputs as well. - if any(owner in live_out for owner in s.owner_set): + if live_out & s.owner_set: returned_from_cond.add(s) need_alias_in_body = body_scope.modified & defined_in @@ -152,7 +163,6 @@ class ControlFlowTransformer(converter.Base): returned_from_cond = tuple(returned_from_cond) if returned_from_cond: if len(returned_from_cond) == 1: - # TODO(mdan): Move this quirk into the operator implementation. cond_results = returned_from_cond[0] else: cond_results = gast.Tuple([s.ast() for s in returned_from_cond], None) @@ -171,8 +181,9 @@ class ControlFlowTransformer(converter.Base): # actually has some return value as well. cond_results = None # TODO(mdan): This doesn't belong here; it's specific to the operator. - returned_from_body = templates.replace_as_expression('tf.constant(1)') - returned_from_orelse = templates.replace_as_expression('tf.constant(1)') + returned_from_body = (templates.replace_as_expression('tf.constant(1)'),) + returned_from_orelse = ( + templates.replace_as_expression('tf.constant(1)'),) body_name = self.ctx.namer.new_symbol('if_true', body_scope.referenced) orelse_name = self.ctx.namer.new_symbol('if_false', orelse_scope.referenced) diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py index bb214b6f16..2dea18dc5f 100644 --- a/tensorflow/python/autograph/operators/control_flow_test.py +++ b/tensorflow/python/autograph/operators/control_flow_test.py @@ -80,20 +80,34 @@ class WhileLoopTest(test.TestCase): class IfStmtTest(test.TestCase): - def test_tensor(self): - def test_if_stmt(cond): - return control_flow.if_stmt( - cond=cond, - body=lambda: 1, - orelse=lambda: -1) + def single_return_if_stmt(self, cond): + return control_flow.if_stmt(cond=cond, body=lambda: 1, orelse=lambda: -1) + + def multi_return_if_stmt(self, cond): + return control_flow.if_stmt( + cond=cond, body=lambda: (1, 2), orelse=lambda: (-1, -2)) + def test_tensor(self): with self.cached_session() as sess: - self.assertEqual(1, sess.run(test_if_stmt(constant_op.constant(True)))) - self.assertEqual(-1, sess.run(test_if_stmt(constant_op.constant(False)))) + t = self.single_return_if_stmt(constant_op.constant(True)) + self.assertEqual(1, sess.run(t)) + t = self.single_return_if_stmt(constant_op.constant(False)) + self.assertEqual(-1, sess.run(t)) def test_python(self): - self.assertEqual(1, control_flow.if_stmt(True, lambda: 1, lambda: -1)) - self.assertEqual(-1, control_flow.if_stmt(False, lambda: 1, lambda: -1)) + self.assertEqual(1, self.single_return_if_stmt(True)) + self.assertEqual(-1, self.single_return_if_stmt(False)) + + def test_tensor_multiple_returns(self): + with self.cached_session() as sess: + t = self.multi_return_if_stmt(constant_op.constant(True)) + self.assertAllEqual([1, 2], sess.run(t)) + t = self.multi_return_if_stmt(constant_op.constant(False)) + self.assertAllEqual([-1, -2], sess.run(t)) + + def test_python_multiple_returns(self): + self.assertEqual((1, 2), self.multi_return_if_stmt(True)) + self.assertEqual((-1, -2), self.multi_return_if_stmt(False)) if __name__ == '__main__': diff --git a/tensorflow/python/autograph/utils/BUILD b/tensorflow/python/autograph/utils/BUILD index 22451d4f3f..790c661661 100644 --- a/tensorflow/python/autograph/utils/BUILD +++ b/tensorflow/python/autograph/utils/BUILD @@ -22,7 +22,6 @@ py_library( "__init__.py", "context_managers.py", "misc.py", - "multiple_dispatch.py", "py_func.py", "tensor_list.py", "tensors.py", @@ -61,16 +60,6 @@ py_test( ], ) -py_test( - name = "multiple_dispatch_test", - srcs = ["multiple_dispatch_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "py_func_test", srcs = ["py_func_test.py"], diff --git a/tensorflow/python/autograph/utils/__init__.py b/tensorflow/python/autograph/utils/__init__.py index c781958481..d9031159b1 100644 --- a/tensorflow/python/autograph/utils/__init__.py +++ b/tensorflow/python/autograph/utils/__init__.py @@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.python.autograph.utils.context_managers import control_dependency_on_returns from tensorflow.python.autograph.utils.misc import alias_tensors -from tensorflow.python.autograph.utils.multiple_dispatch import run_cond from tensorflow.python.autograph.utils.py_func import wrap_py_func from tensorflow.python.autograph.utils.tensor_list import dynamic_list_append from tensorflow.python.autograph.utils.testing import fake_tf diff --git a/tensorflow/python/autograph/utils/multiple_dispatch.py b/tensorflow/python/autograph/utils/multiple_dispatch.py deleted file mode 100644 index 107c8f7a68..0000000000 --- a/tensorflow/python/autograph/utils/multiple_dispatch.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for type-dependent behavior used in autograph-generated code.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.autograph.utils.type_check import is_tensor -from tensorflow.python.ops import control_flow_ops - - -def run_cond(condition, true_fn, false_fn): - """Type-dependent functional conditional. - - Args: - condition: A Tensor or Python bool. - true_fn: A Python callable implementing the true branch of the conditional. - false_fn: A Python callable implementing the false branch of the - conditional. - - Returns: - result: The result of calling the appropriate branch. If condition is a - Tensor, tf.cond will be used. Otherwise, a standard Python if statement will - be ran. - """ - if is_tensor(condition): - return control_flow_ops.cond(condition, true_fn, false_fn) - else: - return py_cond(condition, true_fn, false_fn) - - -def py_cond(condition, true_fn, false_fn): - """Functional version of Python's conditional.""" - if condition: - results = true_fn() - else: - results = false_fn() - - # The contract for the branch functions is to return tuples, but they should - # be collapsed to a single element when there is only one output. - if len(results) == 1: - return results[0] - return results diff --git a/tensorflow/python/autograph/utils/multiple_dispatch_test.py b/tensorflow/python/autograph/utils/multiple_dispatch_test.py deleted file mode 100644 index 2a77c895ce..0000000000 --- a/tensorflow/python/autograph/utils/multiple_dispatch_test.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for multiple_dispatch.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.autograph.utils import multiple_dispatch -from tensorflow.python.client.session import Session -from tensorflow.python.framework.constant_op import constant -from tensorflow.python.platform import test - - -class MultipleDispatchTest(test.TestCase): - - def test_run_cond_python(self): - true_fn = lambda: (2,) - false_fn = lambda: (3,) - self.assertEqual(multiple_dispatch.run_cond(True, true_fn, false_fn), 2) - self.assertEqual(multiple_dispatch.run_cond(False, true_fn, false_fn), 3) - - def test_run_cond_tf(self): - true_fn = lambda: (constant(2),) - false_fn = lambda: (constant(3),) - with Session() as sess: - out = multiple_dispatch.run_cond(constant(True), true_fn, false_fn) - self.assertEqual(sess.run(out), 2) - out = multiple_dispatch.run_cond(constant(False), true_fn, false_fn) - self.assertEqual(sess.run(out), 3) - - -if __name__ == '__main__': - test.main() -- GitLab From c602fc061ae817ba09cd9aed35f955f45955206f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 10:16:38 -0700 Subject: [PATCH 204/411] Improve shape inference for tf.eye. PiperOrigin-RevId: 216550243 --- tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/linalg_ops_test.py | 173 ++++++++++++------ tensorflow/python/ops/linalg_ops_impl.py | 27 ++- 3 files changed, 137 insertions(+), 64 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 4e8639dfc8..cc6fbf26c2 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1785,6 +1785,7 @@ cuda_py_test( size = "medium", srcs = ["linalg_ops_test.py"], additional_deps = [ + "@absl_py//absl/testing:parameterized", "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index aa17f727d0..ccb3feeaf6 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools + +from absl.testing import parameterized import numpy as np from tensorflow.python.framework import dtypes @@ -52,7 +55,7 @@ class CholeskySolveTest(test.TestCase): def test_works_with_five_different_random_pos_def_matrices(self): for n in range(1, 6): for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]: - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Create 2 x n x n matrix array = np.array( [_RandomPDMatrix(n, self.rng), @@ -76,7 +79,7 @@ class LogdetTest(test.TestCase): (np.complex64, 0.05), (np.complex128, 1e-5)]: matrix = _RandomPDMatrix(n, self.rng, np_dtype) _, logdet_np = np.linalg.slogdet(matrix) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Create 2 x n x n matrix # matrix = np.array( # [_RandomPDMatrix(n, self.rng, np_dtype), @@ -89,7 +92,7 @@ class LogdetTest(test.TestCase): (np.complex64, 0.05), (np.complex128, 1e-5)]: matrix = (np.eye(20) * 1e-6).astype(np_dtype) _, logdet_np = np.linalg.slogdet(matrix) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): logdet_tf = linalg.logdet(matrix) self.assertAllClose(logdet_np, logdet_tf.eval(), atol=atol) @@ -105,7 +108,7 @@ class SlogdetTest(test.TestCase): (np.complex64, 0.05), (np.complex128, 1e-5)]: matrix = _RandomPDMatrix(n, self.rng, np_dtype) sign_np, log_abs_det_np = np.linalg.slogdet(matrix) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): sign_tf, log_abs_det_tf = linalg.slogdet(matrix) self.assertAllClose(log_abs_det_np, log_abs_det_tf.eval(), atol=atol) self.assertAllClose(sign_np, sign_tf.eval(), atol=atol) @@ -115,7 +118,7 @@ class SlogdetTest(test.TestCase): (np.complex64, 0.05), (np.complex128, 1e-5)]: matrix = (np.eye(20) * 1e-6).astype(np_dtype) sign_np, log_abs_det_np = np.linalg.slogdet(matrix) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): sign_tf, log_abs_det_tf = linalg.slogdet(matrix) self.assertAllClose(log_abs_det_np, log_abs_det_tf.eval(), atol=atol) self.assertAllClose(sign_np, sign_tf.eval(), atol=atol) @@ -128,66 +131,126 @@ class AdjointTest(test.TestCase): matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, 6 + 6j]]).astype(dtype) expected_transposed = np.conj(matrix_np.T) - with self.cached_session(): + with self.session(): matrix = ops.convert_to_tensor(matrix_np) transposed = linalg.adjoint(matrix) self.assertEqual((3, 2), transposed.get_shape()) self.assertAllEqual(expected_transposed, transposed.eval()) -class EyeTest(test.TestCase): - pass # Will be filled in below - - -def _GetEyeTest(num_rows, num_columns, batch_shape, dtype): - - def Test(self): +class EyeTest(parameterized.TestCase, test.TestCase): + + def testShapeInferenceNoBatch(self): + self.assertEqual((2, 2), linalg_ops.eye(num_rows=2).shape) + self.assertEqual((2, 3), linalg_ops.eye(num_rows=2, num_columns=3).shape) + + def testShapeInferenceStaticBatch(self): + batch_shape = (2, 3) + self.assertEqual( + (2, 3, 2, 2), + linalg_ops.eye(num_rows=2, batch_shape=batch_shape).shape) + self.assertEqual( + (2, 3, 2, 3), + linalg_ops.eye( + num_rows=2, num_columns=3, batch_shape=batch_shape).shape) + + @parameterized.named_parameters( + ("DynamicRow", array_ops.placeholder_with_default(2, shape=None), None), + ("DynamicRowStaticColumn", + array_ops.placeholder_with_default(2, shape=None), + 3), + ("StaticRowDynamicColumn", + 2, + array_ops.placeholder_with_default(3, shape=None)), + ("DynamicRowDynamicColumn", + array_ops.placeholder_with_default(2, shape=None), + array_ops.placeholder_with_default(3, shape=None))) + def testShapeInferenceStaticBatchWith(self, num_rows, num_columns): + batch_shape = (2, 3) + identity_matrix = linalg_ops.eye( + num_rows=num_rows, + num_columns=num_columns, + batch_shape=batch_shape) + self.assertEqual(4, identity_matrix.shape.ndims) + self.assertEqual((2, 3), identity_matrix.shape[:2]) + if num_rows is not None and not isinstance(num_rows, ops.Tensor): + self.assertEqual(2, identity_matrix.shape[-2]) + + if num_columns is not None and not isinstance(num_columns, ops.Tensor): + self.assertEqual(3, identity_matrix.shape[-1]) + + @parameterized.parameters( + itertools.product( + # num_rows + [0, 1, 2, 5], + # num_columns + [None, 0, 1, 2, 5], + # batch_shape + [None, [], [2], [2, 3]], + # dtype + [ + dtypes.int32, + dtypes.int64, + dtypes.float32, + dtypes.float64, + dtypes.complex64, + dtypes.complex128 + ]) + ) + def test_eye_no_placeholder(self, num_rows, num_columns, batch_shape, dtype): eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype) if batch_shape is not None: eye_np = np.tile(eye_np, batch_shape + [1, 1]) - for use_placeholder in False, True: - if use_placeholder and (num_columns is None or batch_shape is None): - return - with self.test_session(use_gpu=True) as sess: - if use_placeholder: - num_rows_placeholder = array_ops.placeholder( - dtypes.int32, name="num_rows") - num_columns_placeholder = array_ops.placeholder( - dtypes.int32, name="num_columns") - batch_shape_placeholder = array_ops.placeholder( - dtypes.int32, name="batch_shape") - eye = linalg_ops.eye( - num_rows_placeholder, - num_columns=num_columns_placeholder, - batch_shape=batch_shape_placeholder, - dtype=dtype) - eye_tf = sess.run( - eye, - feed_dict={ - num_rows_placeholder: num_rows, - num_columns_placeholder: num_columns, - batch_shape_placeholder: batch_shape - }) - else: - eye_tf = linalg_ops.eye( - num_rows, - num_columns=num_columns, - batch_shape=batch_shape, - dtype=dtype).eval() - self.assertAllEqual(eye_np, eye_tf) - - return Test + eye_tf = self.evaluate(linalg_ops.eye( + num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype)) + self.assertAllEqual(eye_np, eye_tf) + + @parameterized.parameters( + itertools.product( + # num_rows + [0, 1, 2, 5], + # num_columns + [0, 1, 2, 5], + # batch_shape + [[], [2], [2, 3]], + # dtype + [ + dtypes.int32, + dtypes.int64, + dtypes.float32, + dtypes.float64, + dtypes.complex64, + dtypes.complex128 + ]) + ) + def test_eye_with_placeholder( + self, num_rows, num_columns, batch_shape, dtype): + eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype) + eye_np = np.tile(eye_np, batch_shape + [1, 1]) + num_rows_placeholder = array_ops.placeholder( + dtypes.int32, name="num_rows") + num_columns_placeholder = array_ops.placeholder( + dtypes.int32, name="num_columns") + batch_shape_placeholder = array_ops.placeholder( + dtypes.int32, name="batch_shape") + eye = linalg_ops.eye( + num_rows_placeholder, + num_columns=num_columns_placeholder, + batch_shape=batch_shape_placeholder, + dtype=dtype) + with self.session(use_gpu=True) as sess: + eye_tf = sess.run( + eye, + feed_dict={ + num_rows_placeholder: num_rows, + num_columns_placeholder: num_columns, + batch_shape_placeholder: batch_shape + }) + self.assertAllEqual(eye_np, eye_tf) if __name__ == "__main__": - for _num_rows in 0, 1, 2, 5: - for _num_columns in None, 0, 1, 2, 5: - for _batch_shape in None, [], [2], [2, 3]: - for _dtype in (dtypes.int32, dtypes.int64, dtypes.float32, - dtypes.float64, dtypes.complex64, dtypes.complex128): - name = "dtype_%s_num_rows_%s_num_column_%s_batch_shape_%s_" % ( - _dtype.name, _num_rows, _num_columns, _batch_shape) - _AddTest(EyeTest, "EyeTest", name, - _GetEyeTest(_num_rows, _num_columns, _batch_shape, _dtype)) - test.main() diff --git a/tensorflow/python/ops/linalg_ops_impl.py b/tensorflow/python/ops/linalg_ops_impl.py index e7c89f6ae3..37c724e032 100644 --- a/tensorflow/python/ops/linalg_ops_impl.py +++ b/tensorflow/python/ops/linalg_ops_impl.py @@ -44,22 +44,31 @@ def eye(num_rows, is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns - if isinstance(num_rows, ops.Tensor) or isinstance( - num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): - batch_shape = ops.convert_to_tensor( - batch_shape, name='shape', dtype=dtypes.int32) + + # We cannot statically infer what the diagonal size should be: + if (isinstance(num_rows, ops.Tensor) or + isinstance(num_columns, ops.Tensor)): diag_size = math_ops.minimum(num_rows, num_columns) - diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) - if not is_square: - shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) else: + # We can statically infer the diagonal size, and whether it is square. if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.') - batch_shape = [dim for dim in batch_shape] is_square = num_rows == num_columns - diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] + diag_size = np.minimum(num_rows, num_columns) + + # We can not statically infer the shape of the tensor. + if isinstance(batch_shape, ops.Tensor) or isinstance(diag_size, ops.Tensor): + batch_shape = ops.convert_to_tensor( + batch_shape, name='shape', dtype=dtypes.int32) + diag_shape = array_ops.concat((batch_shape, [diag_size]), axis=0) + if not is_square: + shape = array_ops.concat((batch_shape, [num_rows, num_columns]), axis=0) + # We can statically infer everything. + else: + batch_shape = list(batch_shape) + diag_shape = batch_shape + [diag_size] if not is_square: shape = batch_shape + [num_rows, num_columns] -- GitLab From 60a0bfeb389e490e80d2effd1e518c7953783ac7 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 10:20:21 -0700 Subject: [PATCH 205/411] Remove the tensorflow import from generated code. PiperOrigin-RevId: 216550899 --- tensorflow/python/autograph/core/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/autograph/core/config.py b/tensorflow/python/autograph/core/config.py index 4fa8489af5..574f819504 100644 --- a/tensorflow/python/autograph/core/config.py +++ b/tensorflow/python/autograph/core/config.py @@ -45,5 +45,4 @@ NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan); Consolidate all internal imports into a single __ag module. COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', - 'import tensorflow as tf', ) -- GitLab From 4f0caab261e26178c8b53080055fdddd046c0d6a Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 10 Oct 2018 10:26:57 -0700 Subject: [PATCH 206/411] Switch to code style, keep link to tensorflow.org --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 34406f4ed7..44511c9510 100644 --- a/README.md +++ b/README.md @@ -31,15 +31,17 @@ subscribing to ## Installation for install current release for CPU-only: -*pip install tensorflow* +``` +pip install tensorflow +``` GPU package for CUDA-enabled GPU cards: -*pip install tensorflow-gpu* - - - +``` +pip install tensorflow-gpu +``` +*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.* People who are a little more adventurous can also try our nightly binaries: -- GitLab From 83976f270ceb1cbe17a1fbf3a8e945748537886e Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 10 Oct 2018 10:28:54 -0700 Subject: [PATCH 207/411] Wording. --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 44511c9510..99321b294e 100644 --- a/README.md +++ b/README.md @@ -29,19 +29,20 @@ subscribing to [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). ## Installation -for install current release for CPU-only: + +To install the current release for CPU-only: ``` pip install tensorflow ``` -GPU package for CUDA-enabled GPU cards: +Use the GPU package for CUDA-enabled GPU cards: ``` pip install tensorflow-gpu ``` -*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.* +*See [Installing TensorFlow](https://www.tensorflow.org/install) for detailed instructions, and how to build from source.* People who are a little more adventurous can also try our nightly binaries: -- GitLab From 1e69efe803df50dc00174da37cda9b8147d886f3 Mon Sep 17 00:00:00 2001 From: Tamara Norman Date: Wed, 10 Oct 2018 10:21:56 -0700 Subject: [PATCH 208/411] Remove unused build dependency in gradients_impl on backprop in order to allow reverse dependency. PiperOrigin-RevId: 216551226 --- tensorflow/python/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 822d596995..18ade384f5 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2178,7 +2178,6 @@ py_library( ":util", ":variable_scope", "//tensorflow/core:protos_all_py", - "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:tape", "//third_party/py/numpy", -- GitLab From dbac4acb330663c4a3b8a167b9f83c5b9acc95fe Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 10 Oct 2018 10:31:56 -0700 Subject: [PATCH 209/411] [XLA] Make sure that the multi-output path of BF16 normalization updates roots. PiperOrigin-RevId: 216553003 --- .../xla/service/bfloat16_normalization.cc | 4 ++++ .../service/bfloat16_normalization_test.cc | 24 +++++++++++++++++++ .../compiler/xla/service/hlo_verifier.cc | 6 ++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index d5b1148058..1251f0258f 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -231,6 +231,10 @@ Status BFloat16NormalizationVisitor::HandleMultipleOutputs( for (auto* user : materialized_users) { TF_RETURN_IF_ERROR(hlo->ReplaceUseWith(user, tuple)); } + bool is_root = computation_->root_instruction() == hlo; + if (is_root) { + computation_->set_root_instruction(tuple); + } *tuple->mutable_shape() = original_shape; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index 2411fdcb20..cb075a5e38 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -298,6 +298,30 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) { EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32); } +TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSortRoot) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {1024}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {1024}); + + HloInstruction* key = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "key")); + HloInstruction* value = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "value")); + + HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort( + ShapeUtil::MakeTupleShape({bf16_shape, bf16_shape}), 0, key, {value})); + + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module)); + + EXPECT_EQ(sort->operand(0)->shape().element_type(), F32); + EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32); + EXPECT_NE(computation->root_instruction(), sort); + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kTuple); +} + // Tests that the normalization should not cause unsupported mixed precision due // to resolving unsupported BF16 operand. TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index a1f668921d..912d2dbe75 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -878,10 +878,8 @@ Status CheckEntryComputationLayout(const HloModule& module) { const HloComputation* computation = module.entry_computation(); const auto& layout = module.entry_computation_layout(); - // TODO(117498192): Change into a call to Compatible(...). - if (!ShapeUtil::CompatibleIgnoringFpPrecision( - computation->root_instruction()->shape(), - layout.result_layout().shape())) { + if (!ShapeUtil::Compatible(computation->root_instruction()->shape(), + layout.result_layout().shape())) { return InternalError( "Shape of the root instruction of entry computation (%s) should be " "compatible to one specified in module's entry computation layout (%s)", -- GitLab From 0445f420981524a52b87ce7de74d7c0c39177cd6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 11:12:34 -0700 Subject: [PATCH 210/411] Call InitMain in tflite_diff_example_test. PiperOrigin-RevId: 216560608 --- tensorflow/contrib/lite/testing/BUILD | 2 ++ tensorflow/contrib/lite/testing/tflite_diff_example_test.cc | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 3dc666f631..b476445b3a 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -350,6 +350,7 @@ tf_cc_test( deps = [ ":tflite_diff_flags", ":tflite_diff_util", + "//tensorflow/core:lib", ], ) @@ -359,6 +360,7 @@ cc_binary( deps = [ ":tflite_diff_flags", ":tflite_diff_util", + "//tensorflow/core:lib", ], ) diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc index f2c49fe389..e85d9c525a 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc +++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc @@ -15,12 +15,15 @@ limitations under the License. #include "tensorflow/contrib/lite/testing/tflite_diff_flags.h" #include "tensorflow/contrib/lite/testing/tflite_diff_util.h" +#include "tensorflow/core/platform/init_main.h" int main(int argc, char** argv) { ::tflite::testing::DiffOptions options = ::tflite::testing::ParseTfliteDiffFlags(&argc, argv); if (options.tensorflow_model.empty()) return 1; + ::tensorflow::port::InitMain("usage", &argc, &argv); + int failure_count = 0; for (int i = 0; i < options.num_runs_per_pass; i++) { if (!tflite::testing::RunDiffTest(options, /*num_invocations=*/1)) { -- GitLab From 4a433a5b273a32fc7f87a32a7245dc1a708dfc33 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 10 Oct 2018 11:13:31 -0700 Subject: [PATCH 211/411] Automated rollback of commit 6c40bc717442d56f0b6a60658b05f0549afd69ee. PiperOrigin-RevId: 216560788 --- .../contrib/losses/python/losses/loss_ops.py | 14 ++++-- .../contrib/metrics/python/ops/metric_ops.py | 48 ++++++++++++------- tensorflow/contrib/rate/rate.py | 11 ++--- tensorflow/python/keras/metrics.py | 28 +++++++---- tensorflow/python/kernel_tests/losses_test.py | 15 ------ tensorflow/python/ops/losses/losses_impl.py | 22 ++++++--- tensorflow/python/ops/metrics_impl.py | 42 ++++++++++------ 7 files changed, 104 insertions(+), 76 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 651de4e2f4..619294b518 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -22,6 +22,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.framework.python.ops import add_arg_scope +from tensorflow.python.compat import compat from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -82,6 +83,8 @@ def _safe_div(numerator, denominator, name="value"): Returns: The element-wise value of the numerator divided by the denominator. """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator, name=name) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, @@ -104,7 +107,7 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return _safe_div(total_loss, num_present, name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -609,11 +612,14 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch) + term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, + num_present_per_batch, + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) - term2 = 2.0 * _safe_div( - math_ops.square(sum_diff), math_ops.square(num_present_per_batch)) + term2 = 2.0 * _safe_div(math_ops.square(sum_diff), + math_ops.square(num_present_per_batch), + name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index bbf5d3f30c..d6932f6e4b 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -24,6 +24,7 @@ from __future__ import print_function import collections as collections_lib +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -45,22 +46,30 @@ from tensorflow.python.util.deprecation import deprecated _EPSILON = 1e-7 -def _safe_div(numerator, denominator, name): - """Divides two values, returning 0 if the denominator is <= 0. +def _safe_div(numerator, denominator): + """Computes a safe divide which returns 0 if the denominator is zero. + + Note that the function contains an additional conditional check that is + necessary for avoiding situations where the loss is zero causing NaNs to + creep into the gradient computation. Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. + numerator: An arbitrary `Tensor`. + denominator: A `Tensor` whose shape matches `numerator` and whose values are + assumed to be non-negative. Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` + The element-wise value of the numerator divided by the denominator. """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), - math_ops.truediv(numerator, denominator), - 0, - name=name) + math_ops.div(numerator, + array_ops.where( + math_ops.equal(denominator, 0), + array_ops.ones_like(denominator), denominator)), + array_ops.zeros_like(numerator)) @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the ' @@ -3239,11 +3248,11 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = _safe_div( - math_ops.reduce_sum(weighted_predictions), batch_count, - 'batch_mean_prediction') + math_ops.reduce_sum(weighted_predictions), + batch_count) delta_mean_prediction = _safe_div( - (batch_mean_prediction - mean_prediction) * batch_count, update_count, - 'delta_mean_prediction') + (batch_mean_prediction - mean_prediction) * batch_count, + update_count) update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation @@ -3251,9 +3260,11 @@ def streaming_covariance(predictions, # batch_mean_label is E[y_B] in the update equation batch_mean_label = _safe_div( - math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label') - delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count, - update_count, 'delta_mean_label') + math_ops.reduce_sum(weighted_labels), + batch_count) + delta_mean_label = _safe_div( + (batch_mean_label - mean_label) * batch_count, + update_count) update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label @@ -3915,8 +3926,9 @@ def cohen_kappa(labels, po_sum = math_ops.reduce_sum(po) total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( - metrics_impl._safe_div( # pylint: disable=protected-access - pe_row * pe_col, total, None)) + _safe_div( + math_ops.to_double(pe_row * pe_col), + math_ops.to_double(total))) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), math_ops.to_double(total)) diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 24d586479a..d948066b36 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -108,13 +108,6 @@ class Rate(object): def variables(self): return self._vars - def _safe_div(self, numerator, denominator, name): - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - def _add_variable(self, name, shape=None, dtype=None): """Private method for adding variables to the graph.""" if self._built: @@ -148,4 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return self._safe_div(self.numer, self.denom, name="safe_rate") + return math_ops.div_no_nan(self.numer, + math_ops.maximum(self.denom, 0), + name="safe_rate") diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index d217244e2f..920eaf5596 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -28,6 +28,7 @@ import types import weakref import six +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import dtypes @@ -172,20 +173,29 @@ def weakmethod(method): def safe_div(numerator, denominator): - """Divides two tensors element-wise, returning 0 if the denominator is <= 0. + """Computes a safe divide which returns 0 if the denominator is zero. + + Note that the function contains an additional conditional check that is + necessary for avoiding situations where the loss is zero causing NaNs to + creep into the gradient computation. Args: - numerator: A `Tensor`. - denominator: A `Tensor`, with dtype matching `numerator`. + numerator: An arbitrary `Tensor`. + denominator: A `Tensor` whose shape matches `numerator` and whose values are + assumed to be non-negative. Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` + The element-wise value of the numerator divided by the denominator. """ - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero) + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator) + return array_ops.where( + math_ops.greater(denominator, 0), + math_ops.div(numerator, + array_ops.where( + math_ops.equal(denominator, 0), + array_ops.ones_like(denominator), denominator)), + array_ops.zeros_like(numerator)) def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight): diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index 3ce0b74263..fb0b5f1137 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl @@ -34,25 +33,11 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses -from tensorflow.python.ops.losses import losses_impl from tensorflow.python.ops.losses import util from tensorflow.python.platform import test from tensorflow.python.training import momentum as momentum_lib -safe_div = losses_impl._safe_div # pylint: disable=protected-access - - -class SafeDivTest(test.TestCase): - - def testEager(self): - with context.eager_mode(): - self.assertAllEqual(safe_div(constant_op.constant(1.0), - constant_op.constant(0.0)), 0.0) - self.assertAllEqual(safe_div(constant_op.constant(1.0), - 0.0), 0.0) - - class AbsoluteDifferenceLossTest(test.TestCase): def setUp(self): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 806539747e..8a8a81ab5c 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -83,18 +84,21 @@ def _safe_div(numerator, denominator, name="value"): Args: numerator: An arbitrary `Tensor`. - denominator: `Tensor` whose shape matches `numerator` and whose values are + denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator, name=name) return array_ops.where( math_ops.greater(denominator, 0), - math_ops.div(numerator, array_ops.where( - math_ops.equal(denominator, 0), - array_ops.ones_like(denominator), denominator)), + math_ops.div(numerator, + array_ops.where( + math_ops.equal(denominator, 0), + array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name) @@ -599,14 +603,18 @@ def mean_pairwise_squared_error( keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, - num_present_per_batch - 1) + term1 = 2.0 * _safe_div( + sum_squares_diff_per_batch, + math_ops.maximum(num_present_per_batch - 1, 0)) sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keepdims=True) term2 = 2.0 * _safe_div( math_ops.square(sum_diff), - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1)) + math_ops.maximum( + math_ops.multiply(num_present_per_batch, + num_present_per_batch - 1), + 0)) weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 763877c2d2..b8d96b4a6e 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -224,6 +225,8 @@ def _safe_div(numerator, denominator, name): Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator) t = math_ops.truediv(numerator, denominator) zero = array_ops.zeros_like(t, dtype=denominator.dtype) condition = math_ops.greater(denominator, zero) @@ -244,12 +247,7 @@ def _safe_scalar_div(numerator, denominator, name): """ numerator.get_shape().with_rank_at_most(1) denominator.get_shape().with_rank_at_most(1) - return control_flow_ops.cond( - math_ops.equal( - array_ops.constant(0.0, dtype=dtypes.float64), denominator), - lambda: array_ops.constant(0.0, dtype=dtypes.float64), - lambda: math_ops.div(numerator, denominator), - name=name) + return _safe_div(numerator, denominator, name=name) def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None): @@ -402,11 +400,14 @@ def mean(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + def compute_mean(_, t, c): + return _safe_div(t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = _safe_div(update_total_op, + math_ops.maximum(update_count_op, 0), + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -778,16 +779,21 @@ def auc(labels, """ dtp = tp[:num_thresholds - 1] - tp[1:] p = tp + fp - prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope') + prec_slope = _safe_div( + dtp, + math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0), + name='prec_slope') intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) safe_p_ratio = array_ops.where( math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), - _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'), + _safe_div(p[:num_thresholds - 1], + math_ops.maximum(p[1:], 0), + name='recall_relative_ratio'), array_ops.ones_like(p[1:])) return math_ops.reduce_sum( _safe_div( prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), - tp[1:] + fn[1:], + math_ops.maximum(tp[1:] + fn[1:], 0), name='pr_auc_increment'), name='interpolate_pr_auc') @@ -1068,7 +1074,8 @@ def mean_per_class_accuracy(labels, update_count_op = state_ops.scatter_add(count, labels, is_correct) def compute_mean_accuracy(_, count, total): - per_class_accuracy = _safe_div(count, total, None) + per_class_accuracy = _safe_div( + count, math_ops.maximum(total, 0), name=None) mean_accuracy_v = math_ops.reduce_mean( per_class_accuracy, name='mean_accuracy') return mean_accuracy_v @@ -1076,7 +1083,9 @@ def mean_per_class_accuracy(labels, mean_accuracy_v = _aggregate_across_towers( metrics_collections, compute_mean_accuracy, count, total) - update_op = _safe_div(update_count_op, update_total_op, name='update_op') + update_op = _safe_div(update_count_op, + math_ops.maximum(update_total_op, 0), + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -1385,12 +1394,15 @@ def mean_tensor(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + compute_mean = lambda _, t, c: _safe_div( + t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = _safe_div(update_total_op, + math_ops.maximum(update_count_op, 0), + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) -- GitLab From 217ad9a568d85b36834090d8a7a17fffeaa0ec89 Mon Sep 17 00:00:00 2001 From: Raghuraman Krishnamoorthi Date: Wed, 10 Oct 2018 11:15:27 -0700 Subject: [PATCH 212/411] Support for shared weights in quantization rewriter. PiperOrigin-RevId: 216561137 --- .../quantize/python/fold_batch_norms.py | 58 ++- .../quantize/python/fold_batch_norms_test.py | 400 ++++++++++++------ .../contrib/quantize/python/quantize.py | 157 +++++-- .../quantize/python/quantize_graph_test.py | 109 +++++ 4 files changed, 538 insertions(+), 186 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 7575b1b6cd..e0c6da00d8 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -145,7 +145,7 @@ def _FindFusedBatchNorms(graph): Args: graph: Graph to inspect. - Yields: + Returns: _FusedBatchNormMatches. """ input_pattern = graph_matcher.OpTypePattern('*') @@ -169,8 +169,15 @@ def _FindFusedBatchNorms(graph): graph_matcher.OpTypePattern('*'), graph_matcher.OpTypePattern('*') ]) + # Identity between conv/matmul and bn + layer_pattern_with_identity = graph_matcher.OpTypePattern( + 'Identity', + inputs=[ + graph_matcher.OneofPattern([batch_to_space_pattern, layer_pattern]) + ]) layer_output_pattern = graph_matcher.OneofPattern( - [layer_pattern, batch_to_space_pattern]) + [layer_pattern_with_identity, layer_pattern, batch_to_space_pattern]) + # MatMul has a Reshape between it and FusedBatchNorm. matmul_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', @@ -188,6 +195,11 @@ def _FindFusedBatchNorms(graph): 'Reshape', inputs=[batch_norm_pattern, graph_matcher.OpTypePattern('*')]) + batch_norm_identity_pattern = graph_matcher.OpTypePattern( + 'Identity', inputs=[batch_norm_pattern, matmul_bn_output_reshape_pattern]) + + bn_identity_matcher = graph_matcher.GraphMatcher(batch_norm_identity_pattern) + bn_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern( [matmul_bn_output_reshape_pattern, batch_norm_pattern])) @@ -200,7 +212,17 @@ def _FindFusedBatchNorms(graph): moving_avg_mul_matcher = graph_matcher.GraphMatcher( moving_average_mul_pattern) - for match_result in bn_matcher.match_graph(graph): + def _GetLayerMatch(match_result): + """Populates a layer match object containing ops/tensors for folding BNs. + + Args: + match_result: Matched result from graph matcher + + Returns: + layer_op: Matching conv/fc op prior to batch norm + BatchNormMatch: _BatchNormMatch containing all required batch norm + parameters. + """ moving_mean_tensor = None moving_variance_tensor = None bn_decay_mean_tensor = None @@ -208,7 +230,11 @@ def _FindFusedBatchNorms(graph): batch_to_space_op = None layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) + bn_id_op = match_result.get_op(batch_norm_identity_pattern) bn_op = match_result.get_op(batch_norm_pattern) + if bn_id_op is None: + bn_id_op = bn_op + batch_epsilon = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a @@ -219,13 +245,13 @@ def _FindFusedBatchNorms(graph): # If the matcher didn't match matmul_bn_output_reshape, there will be # another match for this 'MatMul' later, so we can skip this one. if output_reshape_op is None: - continue + return None, None output_tensor = output_reshape_op.outputs[0] # Ensure that the output tensor has consumers, otherwise this is a dangling # node and not a match. if not output_tensor.consumers(): - continue + return None, None batch_to_space_op = match_result.get_op(batch_to_space_pattern) input_tensor = match_result.get_tensor(input_pattern) @@ -277,7 +303,7 @@ def _FindFusedBatchNorms(graph): mean_tensor = match_result.get_tensor(mean_pattern) variance_tensor = match_result.get_tensor(variance_pattern) - yield _BatchNormMatch( + return layer_op, _BatchNormMatch( layer_op=layer_op, bn_op=bn_op, output_tensor=output_tensor, @@ -294,6 +320,26 @@ def _FindFusedBatchNorms(graph): batch_epsilon=batch_epsilon, batch_to_space_op=batch_to_space_op) + layer_matches = [] + # We use matched_layer_set to ensure that layers aren't matched multiple + # times. + matched_layer_set = set() + for match_result in bn_identity_matcher.match_graph(graph): + layer_op, layer_match = _GetLayerMatch(match_result) + if layer_op is not None: + if layer_op not in matched_layer_set: + matched_layer_set.add(layer_op) + layer_matches.append(layer_match) + + for match_result in bn_matcher.match_graph(graph): + layer_op, layer_match = _GetLayerMatch(match_result) + if layer_op is not None: + if layer_op not in matched_layer_set: + matched_layer_set.add(layer_op) + layer_matches.append(layer_match) + + return layer_matches + def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay): """Computes batch norm correction params. diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index 3f8063cc02..77b3f62e9d 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -48,26 +48,32 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def _RunTestOverParameters(self, test_fn): parameters_list = [ # (relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, - # freeze_batch_norm_delay) - (nn_ops.relu6, 'Relu6', False, False, False, 100), - (nn_ops.relu, 'Relu', False, False, False, None), - (nn_ops.relu6, 'Relu6', True, False, False, 100), - (nn_ops.relu, 'Relu', True, False, False, None), - (nn_ops.relu6, 'Relu6', False, True, False, 100), - (nn_ops.relu, 'Relu', False, True, False, None), - (nn_ops.relu6, 'Relu6', True, True, False, 100), - (nn_ops.relu, 'Relu', True, True, False, None), + # freeze_batch_norm_delay, insert identity node) + (nn_ops.relu6, 'Relu6', False, False, False, 100, False), + (nn_ops.relu, 'Relu', False, False, False, None, False), + (nn_ops.relu6, 'Relu6', True, False, False, 100, False), + (nn_ops.relu, 'Relu', True, False, False, None, False), + (nn_ops.relu6, 'Relu6', False, True, False, 100, False), + (nn_ops.relu, 'Relu', False, True, False, None, False), + (nn_ops.relu6, 'Relu6', True, True, False, 100, False), + (nn_ops.relu, 'Relu', True, True, False, None, False), # Fused batch norm always has scaling enabled. - (nn_ops.relu6, 'Relu6', False, True, True, None), - (nn_ops.relu, 'Relu', False, True, True, 100), - (nn_ops.relu6, 'Relu6', True, True, True, None), - (nn_ops.relu, 'Relu', True, True, True, 100), + (nn_ops.relu6, 'Relu6', False, True, True, None, False), + (nn_ops.relu, 'Relu', False, True, True, 100, False), + (nn_ops.relu6, 'Relu6', True, True, True, None, False), + (nn_ops.relu, 'Relu', True, True, True, 100, False), + (nn_ops.relu6, 'Relu6', False, True, True, None, True), + (nn_ops.relu, 'Relu', False, True, True, 100, True), + (nn_ops.relu6, 'Relu6', True, True, True, None, True), + (nn_ops.relu, 'Relu', True, True, True, 100, True), ] for params in parameters_list: - test_fn(params[0], params[1], params[2], params[3], params[4], params[5]) + test_fn(params[0], params[1], params[2], params[3], params[4], params[5], + params[6]) def _TestFoldConv2d(self, relu, relu_op_name, with_bypass, has_scaling, - fused_batch_norm, freeze_batch_norm_delay): + fused_batch_norm, freeze_batch_norm_delay, + insert_identity_node): """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*. Args: @@ -79,6 +85,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -87,18 +95,42 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): out_depth = 3 if with_bypass else 32 stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = conv2d( - inputs, - out_depth, [5, 5], - stride=stride, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + conv_out = array_ops.identity(node, name='conv_out') + + node = batch_norm( + conv_out, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + conv_name = name + '/Conv' + else: + node = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -106,31 +138,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') self._AssertInputOpsAre(folded_mul, [ - scope + '/correction_mult', - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm) + conv_name + '/correction_mult', + self._BatchNormMultiplierName(conv_name, has_scaling, fused_batch_norm) ]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold']) + self._AssertOutputGoesToOps(folded_mul, g, [conv_name + '/Conv2D_Fold']) - folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold') + folded_conv = g.get_operation_by_name(conv_name + '/Conv2D_Fold') self.assertEqual(folded_conv.type, 'Conv2D') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [conv_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, [conv_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + conv_name + '/correction_add', + self._BathNormBiasName(conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) - + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -143,7 +174,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): relu_op_name='Relu', has_scaling=True, fused_batch_norm=False, - freeze_batch_norm_delay=None): + freeze_batch_norm_delay=None, + insert_identity_node=False): """Tests folding cases for a network with multiple layers. Args: @@ -153,6 +185,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -225,9 +259,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) - def _TestFoldConv2dUnknownShape(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestFoldConv2dUnknownShape(self, + relu, + relu_op_name, + with_bypass, + has_scaling, + fused_batch_norm, + freeze_batch_norm_delay, + insert_identity_node=False): """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*. Tests that folding works even with an input shape where some dimensions are @@ -242,6 +281,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -298,9 +339,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def testFoldConv2dUnknownShape(self): self._RunTestOverParameters(self._TestFoldConv2dUnknownShape) - def _TestFoldFullyConnectedLayer(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestFoldFullyConnectedLayer( + self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, + freeze_batch_norm_delay, insert_identity_node): """Tests folding cases: inputs -> FC with batch norm -> Relu*. Args: @@ -312,6 +353,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -319,16 +362,40 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, depth)) out_depth = 256 if with_bypass else 128 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = fully_connected( - inputs, - out_depth, - weights_initializer=self._WeightInit(0.03), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + insert_identity_node = fused_batch_norm + if insert_identity_node: + with g.name_scope(name): + node = fully_connected( + inputs, + out_depth, + weights_initializer=self._WeightInit(0.03), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='fc_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + fc_name = name + '/fully_connected' + else: + + node = fully_connected( + inputs, + out_depth, + weights_initializer=self._WeightInit(0.03), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + fc_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -336,30 +403,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(fc_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') self._AssertInputOpsAre(folded_mul, [ - scope + '/correction_mult', - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm) + fc_name + '/correction_mult', + self._BatchNormMultiplierName(fc_name, has_scaling, fused_batch_norm) ]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/MatMul_Fold']) + self._AssertOutputGoesToOps(folded_mul, g, [fc_name + '/MatMul_Fold']) - folded_conv = g.get_operation_by_name(scope + '/MatMul_Fold') + folded_conv = g.get_operation_by_name(fc_name + '/MatMul_Fold') self.assertEqual(folded_conv.type, 'MatMul') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [fc_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, [fc_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(fc_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + fc_name + '/correction_add', + self._BathNormBiasName(fc_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -369,7 +436,7 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def _TestFoldDepthwiseConv2d(self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + freeze_batch_norm_delay, insert_identity_node): """Tests folding: inputs -> DepthwiseConv2d with batch norm -> Relu*. Args: @@ -380,7 +447,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): has_scaling: Bool, when true the batch norm has scaling. fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training - switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm switches to using frozen mean and variance """ g = ops.Graph() with g.as_default(): @@ -388,19 +456,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, height, width, 3)) stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = separable_conv2d( - inputs, - None, [5, 5], - stride=stride, - depth_multiplier=1.0, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = separable_conv2d( + inputs, + None, [5, 5], + stride=stride, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='sep_conv_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + sep_conv_name = name + '/SeparableConv2d' + else: + node = separable_conv2d( + inputs, + None, [5, 5], + stride=stride, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + sep_conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -408,40 +501,43 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') if fused_batch_norm: - scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape' + scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape' else: - scale_reshape_op_name = scope + '/scale_reshape' - self._AssertInputOpsAre(folded_mul, - [scope + '/correction_mult', scale_reshape_op_name]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold']) + scale_reshape_op_name = sep_conv_name + '/scale_reshape' + self._AssertInputOpsAre( + folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name]) + self._AssertOutputGoesToOps(folded_mul, g, + [sep_conv_name + '/depthwise_Fold']) scale_reshape = g.get_operation_by_name(scale_reshape_op_name) self.assertEqual(scale_reshape.type, 'Reshape') self._AssertInputOpsAre(scale_reshape, [ - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm), + self._BatchNormMultiplierName(sep_conv_name, has_scaling, + fused_batch_norm), scale_reshape_op_name + '/shape' ]) - self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold']) + self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold']) - folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold') + folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold') self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [sep_conv_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, + [sep_conv_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + sep_conv_name + '/correction_add', + self._BathNormBiasName(sep_conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -450,7 +546,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): self._RunTestOverParameters(self._TestFoldDepthwiseConv2d) def _TestFoldAtrousConv2d(self, relu, relu_op_name, with_bypass, has_scaling, - fused_batch_norm, freeze_batch_norm_delay): + fused_batch_norm, freeze_batch_norm_delay, + insert_identity_node): """Tests folding: inputs -> AtrousConv2d with batch norm -> Relu*. Args: @@ -461,7 +558,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): has_scaling: Bool, when true the batch norm has scaling. fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training - switches to using frozen mean and variance + switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -469,19 +568,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, height, width, 3)) dilation_rate = 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = separable_conv2d( - inputs, - None, [3, 3], - rate=dilation_rate, - depth_multiplier=1.0, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = separable_conv2d( + inputs, + None, [3, 3], + rate=dilation_rate, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='sep_conv_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + sep_conv_name = name + '/SeparableConv2d' + else: + node = separable_conv2d( + inputs, + None, [3, 3], + rate=dilation_rate, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + sep_conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -489,45 +613,48 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') if fused_batch_norm: - scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape' + scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape' else: - scale_reshape_op_name = scope + '/scale_reshape' - self._AssertInputOpsAre(folded_mul, - [scope + '/correction_mult', scale_reshape_op_name]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold']) + scale_reshape_op_name = sep_conv_name + '/scale_reshape' + self._AssertInputOpsAre( + folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name]) + self._AssertOutputGoesToOps(folded_mul, g, + [sep_conv_name + '/depthwise_Fold']) scale_reshape = g.get_operation_by_name(scale_reshape_op_name) self.assertEqual(scale_reshape.type, 'Reshape') self._AssertInputOpsAre(scale_reshape, [ - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm), + self._BatchNormMultiplierName(sep_conv_name, has_scaling, + fused_batch_norm), scale_reshape_op_name + '/shape' ]) - self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold']) + self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold']) - folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold') + folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold') self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative') - self._AssertInputOpsAre( - folded_conv, [scope + '/mul_fold', scope + '/depthwise/SpaceToBatchND']) + self._AssertInputOpsAre(folded_conv, [ + sep_conv_name + '/mul_fold', sep_conv_name + '/depthwise/SpaceToBatchND' + ]) if fused_batch_norm: self._AssertOutputGoesToOps(folded_conv, g, - [scope + '/BatchToSpaceND_Fold']) + [sep_conv_name + '/BatchToSpaceND_Fold']) else: - self._AssertOutputGoesToOps(folded_conv, g, - [scope + '/depthwise/BatchToSpaceND_Fold']) + self._AssertOutputGoesToOps( + folded_conv, g, [sep_conv_name + '/depthwise/BatchToSpaceND_Fold']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + sep_conv_name + '/correction_add', + self._BathNormBiasName(sep_conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -535,9 +662,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def testFoldAtrousConv2d(self): self._RunTestOverParameters(self._TestFoldAtrousConv2d) - def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestCompareFoldAndUnfolded(self, + relu, + relu_op_name, + with_bypass, + has_scaling, + fused_batch_norm, + freeze_batch_norm_delay, + insert_identity_node=False): """Tests that running folded and unfolded BN returns the same results. Args: @@ -549,6 +681,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ random_seed.set_random_seed(1234) unfolded_g = ops.Graph() diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 5e63d33db8..fd86a96905 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -32,7 +32,9 @@ from tensorflow.python.platform import tf_logging as logging _QUANTIZABLE_TYPES = {'Conv2D', 'MatMul', 'DepthwiseConv2dNative'} # Activations that are supported by the quantization rewrite. -_ACTIVATION_TYPES = {'Relu', 'Relu6'} +_ACTIVATION_TYPES = {'Relu', 'Relu6', 'Identity'} + +_RELU_TYPES = {'Relu', 'Relu6'} def Quantize(graph, @@ -172,7 +174,7 @@ def Quantize(graph, # Add at inference time. consumers = input_to_ops_map.ConsumerOperations( layer_match.post_activation_bypass_op) - if any([consumer.type in _ACTIVATION_TYPES for consumer in consumers]): + if any([consumer.type in _RELU_TYPES for consumer in consumers]): logging.info('Skipping %s, because its followed by an activation.', layer_match.post_activation_bypass_op.name) else: @@ -384,10 +386,11 @@ def _FindLayersToQuantize(graph): bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern) if layer_op not in matched_layer_set: - matched_layer_set.add(layer_op) - layer_matches.append( - _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, - bias_add_op)) + if not _IsSkipLayer(activation_op): + matched_layer_set.add(layer_op) + layer_matches.append( + _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, + bias_add_op)) # Match the final layer, where there may not be an activation and instead # the output of the final BiasAdd must be quantized. So we treat the BiasAdd @@ -424,6 +427,32 @@ def _FindLayersToQuantize(graph): return layer_matches +def _IsSkipLayer(activation_op): + """Skip quantizing conv->identity->Batch norm layers. + + Args: + activation_op: Activation op detected by layer matching pattern + + Returns: + skip_layer: boolean, true when conv->identity->batch norm is detected. + """ + + # Exclude quantization of conv->identity->BN, + # After folding, this part corresponds to estimation of mean and variance + # and should not be quantized. + skip_layer = False + if activation_op.type == 'Identity' and len(activation_op.outputs) == 1: + if len(activation_op.outputs[0].consumers()) == 1: + consumer = activation_op.outputs[0].consumers()[0] + if consumer.type == 'FusedBatchNorm': + skip_layer = True + logging.info( + 'Skipping quantizing %s, because it is the output of a conv/fc' + 'followed by a identity, feeding a fused batch norm.', + activation_op.name) + return skip_layer + + class _LayerMatch(object): """Contains all information related to a matched Layer.""" @@ -461,8 +490,8 @@ class _LayerMatch(object): return self._bias_add_op -def _FollowedByFakeQuant(tensor): - """Returns True if the tensor is followed by a FakeQuant.""" +def _GetFollowingFakeQuantOp(tensor): + """Returns the following FakeQuant op if it exists else None.""" fake_quant_ops = set([ 'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs', 'FakeQuantWithMinMaxVarsPerChannel' @@ -472,11 +501,11 @@ def _FollowedByFakeQuant(tensor): while consumers: c = consumers.pop() if c.type in fake_quant_ops: - return True + return c elif c.type in pass_through_ops: for output in c.outputs: consumers.extend(output.consumers()) - return False + return None def _InsertQuantOp(context, @@ -559,44 +588,78 @@ def _InsertQuantOp(context, # Prevent ops from being quantized multiple times. Bypass ops can sometimes # overlap between multiple matches, so we need to ensure that we don't # add duplicate FakeQuant operations. - if _FollowedByFakeQuant(inputs): - return - - if moving_avg: - quant = ( - quant_ops.MovingAvgQuantize( - inputs, - init_min=init_min, - init_max=init_max, - ema_decay=ema_decay, - is_training=is_training, - num_bits=bits, - narrow_range=narrow_range, - vars_collection=vars_collection, - name_prefix=name_prefix)) + fake_quant_op = _GetFollowingFakeQuantOp(inputs) + + # If we find that we are attempting to insert a fake quant op following + # a fake quant, we skip inserting a fake quant op + + if fake_quant_op is None: + if moving_avg: + quant = ( + quant_ops.MovingAvgQuantize( + inputs, + init_min=init_min, + init_max=init_max, + ema_decay=ema_decay, + is_training=is_training, + num_bits=bits, + narrow_range=narrow_range, + vars_collection=vars_collection, + name_prefix=name_prefix)) + else: + quant = ( + quant_ops.LastValueQuantize( + inputs, + init_min=init_min, + init_max=init_max, + is_training=is_training, + num_bits=bits, + narrow_range=narrow_range, + vars_collection=vars_collection, + name_prefix=name_prefix)) + + if quant_delay and quant_delay > 0: + activate_quant = math_ops.greater_equal( + common.CreateOrGetQuantizationStep(), + quant_delay, + name=name_prefix + '/activate_quant') + quant = control_flow_ops.cond( + activate_quant, + lambda: quant, + lambda: inputs, + name=name_prefix + '/delayed_quant') else: - quant = ( - quant_ops.LastValueQuantize( - inputs, - init_min=init_min, - init_max=init_max, - is_training=is_training, - num_bits=bits, - narrow_range=narrow_range, - vars_collection=vars_collection, - name_prefix=name_prefix)) - - if quant_delay and quant_delay > 0: - activate_quant = math_ops.greater_equal( - common.CreateOrGetQuantizationStep(), - quant_delay, - name=name_prefix + '/activate_quant') - quant = control_flow_ops.cond( - activate_quant, - lambda: quant, - lambda: inputs, - name=name_prefix + '/delayed_quant') - + # return + # If a fake quant op is present already, make sure that + # any downstream use of the tensor reroutes to the appropriate quantized + # tensor. If there is no quant_delay, this is simply the output of the + # fake quant op. If there is a quant delay, we reroute to the output + # of the delayed quant operation, which inserts quantization only after + # a specified quant_delay + + quant = fake_quant_op.outputs[0] + if quant_delay and quant_delay > 0: + name_prefix = '/'.join(quant.name.split('/')[:-1]) + quant = quant.graph.get_tensor_by_name(name_prefix + + '/delayed_quant/Merge:0') + pruned_consumer_set = set() + for consumer in consumers: + fake_quant_dest_op = _GetFollowingFakeQuantOp(consumer.outputs[0]) + if (fake_quant_dest_op is None or + fake_quant_dest_op.name != fake_quant_op.name): + pruned_consumer_set.add(consumer) + consumers = pruned_consumer_set + + # If we have + # input->pass_through->fake_quant + # there is nothing to reroute. + # + # If we have + # input-> pass_through->fake_quant + # |-> consumer + # Then we reroute such that: + # input-> pass_through->fake_quant + # |-> consumer if consumers: tensors_modified_count = common.RerouteTensor( quant, inputs, can_modify=consumers) diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index e80d2183a6..f0fd0949dd 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import template from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest @@ -267,6 +268,27 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): graph_def_after = str(g.as_graph_def()) self.assertEqual(graph_def_before, graph_def_after) + def testIdentityNode(self): + self._RunTestOverAllRewrites(self._TestIdentityNode) + + def _TestIdentityNode(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + self._LayerWithIdentity() + + rewrite_fn(graph) + op_names = [op.name for op in graph.get_operations()] + self.assertTrue(any('test/Conv/weights_quant' in name for name in op_names)) + self.assertTrue(any('test/Conv/act_quant' in name for name in op_names)) + bn_out_identity = graph.get_operation_by_name('test/bn_out') + self._AssertInputOpsAre(bn_out_identity, [ + 'test/Conv/add_fold', + ]) + + conv_out_identity = graph.get_operation_by_name('test/conv_out') + self._AssertOutputGoesToOps(conv_out_identity, graph, + ['test/BatchNorm/FusedBatchNorm']) + def testRewriteWithScope(self): self._RunTestOverExperimentalRewritesWithScope( self._TestRewriteWithScope, 'scope1') @@ -306,6 +328,42 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): # No ops should be inserted or removed. self.assertEqual(op_names_before_rewrite, op_names_after_rewrite) + def testWithSharedWeights(self): + + self._RunTestOverAllRewrites(self._TestWithSharedWeights) + self._RunTestOverTrainingRewrites(self._TestRewriteWithSharedWeights) + + def _TestRewriteWithSharedWeights(self, rewrite_fn, quant_delay=1): + self._TestWithSharedWeights(rewrite_fn, quant_delay) + + def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None): + with ops.Graph().as_default() as g: + conv = template.make_template('shared_weights_conv', self._ConvLayer) + conv() + conv() + if quant_delay is None: + rewrite_fn() + else: + rewrite_fn(quant_delay=quant_delay) + + conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D'] + weights_quants = [ + op for op in g.get_operations() + if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars' + ] + # Check that the shared weights variable is not quantized multiple times + self.assertTrue(len(weights_quants) == 1) + weights_quant_tensor = weights_quants[0].outputs[0] + if quant_delay: + delayed_weights_quants = [ + op for op in g.get_operations() + if 'weights_quant' in op.name and op.type == 'Merge' + ] + self.assertTrue(len(delayed_weights_quants) == 1) + weights_quant_tensor = delayed_weights_quants[0].outputs[0] + # Check that the Conv2D operations get the quantized weights + self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops)) + def _ConvLayer( self, input_tensor=None, scope='test', pre_activation_bypass=False, post_activation_bypass=False): @@ -328,6 +386,57 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): output += input_tensor return output + def _LayerWithIdentity(self, + input_tensor=None, + scope='test', + post_activation_bypass=False): + """Add a basic conv, identity, batch norm with skip to the default graph.""" + batch_size, height, width, depth = 5, 128, 128, 3 + if input_tensor is None: + input_tensor = array_ops.zeros((batch_size, height, width, depth)) + weight_init = init_ops.truncated_normal_initializer + with ops.name_scope(scope): + output = layers.conv2d( + input_tensor, + depth, [5, 5], + padding='SAME', + weights_initializer=weight_init(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + output = array_ops.identity(output, name='conv_out') + + output = layers.batch_norm( + output, center=True, scale=True, decay=1.0 - 0.003, fused=True) + + output = array_ops.identity(output, name='bn_out') + if post_activation_bypass: + output += input_tensor + return output + + def _AssertInputOpsAre(self, op, in_op_names): + """Asserts that all inputs to op come from in_op_names (disregarding order). + + Args: + op: Operation to check inputs for. + in_op_names: List of strings, operations where all op's inputs should come + from. + """ + expected_inputs = [in_op_name + ':0' for in_op_name in in_op_names] + self.assertItemsEqual([t.name for t in op.inputs], expected_inputs) + + def _AssertOutputGoesToOps(self, op, graph, out_op_names): + """Asserts that outputs from op go to out_op_names (and perhaps others). + + Args: + op: Operation to check outputs for. + graph: Graph where output operations are located. + out_op_names: List of strings, operations where op's outputs should go. + """ + for out_op_name in out_op_names: + out_op = graph.get_operation_by_name(out_op_name) + self.assertIn(op.outputs[0].name, [str(t.name) for t in out_op.inputs]) + if __name__ == '__main__': googletest.main() -- GitLab From 881c11a0771c25875453deaa5937cb681675b4d5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 10 Oct 2018 11:20:21 -0700 Subject: [PATCH 213/411] Don't decluster ops not supported by TF There are ops that don't have a TensorFlow kernel and must be run via XLA. Don't decluster these ops. If declustering these ops become important in the future we could perhaps put these on XLA_* devices or put them in a single-node cluster but YAGNI probably. PiperOrigin-RevId: 216562037 --- tensorflow/compiler/jit/BUILD | 1 + .../compiler/jit/partially_decluster_pass.cc | 22 +++++++++---- .../jit/partially_decluster_pass_test.cc | 32 +++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 661b444a42..64adc885bc 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -510,6 +510,7 @@ tf_cc_test( "//tensorflow/compiler/tf2xla:test_util", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", + "//tensorflow/compiler/tf2xla/cc:xla_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc index b1f9e9088f..5b96103223 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/memory_types.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { namespace { @@ -206,18 +207,27 @@ bool IsIntraClusterEdge(const Edge& edge) { return src_cluster_name.has_value() && src_cluster_name == dst_cluster_name; } -Status MustCompileNode(const Node* n, bool* result) { +bool IsMustCompileDevice(const DeviceType& device_type) { + const XlaOpRegistry::DeviceRegistration* registration; + if (XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { + return registration->requires_compilation; + } + + return false; +} + +Status MustCompileNode(const Node* n, bool* must_compile) { DeviceType device_type(""); TF_RETURN_IF_ERROR( DeviceToDeviceType(n->assigned_device_name(), &device_type)); - const XlaOpRegistry::DeviceRegistration* registration; - if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { - *result = false; - } else { - *result = registration->requires_compilation; + if (IsMustCompileDevice(device_type)) { + *must_compile = true; + return Status::OK(); } + // We must compile `n` if it does not have a TensorFlow kernel. + *must_compile = !FindKernelDef(device_type, n->def(), nullptr, nullptr).ok(); return Status::OK(); } diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc index 0feb73a89e..74d5ef5718 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "tensorflow/compiler/tf2xla/cc/ops/xla_ops.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/node_def_util.h" @@ -405,5 +406,36 @@ TEST(PartiallyDeclusterPassTest, DontDeclusterXlaDeviceOps) { } } +TEST(PartiallyDeclusterPassTest, DontDeclusterNonTensorFlowOps) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output dynamic_slice_operand = + ops::Placeholder(s.WithOpName("dynamic_slice_operand"), DT_INT32, + ops::Placeholder::Attrs{}); + Output dynamic_slice_begin = ops::Placeholder( + s.WithOpName("dynamic_slice_begin"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice_size = ops::Placeholder( + s.WithOpName("dynamic_slice_size"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice = + ops::XlaDynamicSlice(s.WithOpName("dynamic_slice"), dynamic_slice_operand, + dynamic_slice_begin, dynamic_slice_size); + + Output reshape_input = ops::Placeholder(s.WithOpName("reshape_input"), + DT_FLOAT, ops::Placeholder::Attrs{}); + Output reshape = + ops::Reshape(s.WithOpName("reshape"), reshape_input, dynamic_slice); + + AddToCluster({dynamic_slice.node(), reshape.node()}, "cluster_0"); + + std::unique_ptr graph = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(s.ToGraph(graph.get())); + + Node* n = FindNodeByName(*graph, "dynamic_slice"); + ASSERT_NE(n, nullptr); + + TF_ASSERT_OK(PartiallyDecluster(&graph)); + + EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); +} + } // namespace } // namespace tensorflow -- GitLab From b95a4b41941b2d4b672df7ddbc30792beb7e1e14 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 11:33:09 -0700 Subject: [PATCH 214/411] Internal change. PiperOrigin-RevId: 216564327 --- .../kernels/bidirectional_sequence_lstm.cc | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc index a326827b1e..1137f05fa6 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc @@ -129,14 +129,14 @@ enum TemporaryTensor { kBwScratchBuffer = 1, // Quantized tensors needed for the hybrid kernel. kInputQuantized = 2, - kAuxInputQuantized = 3, // Quantized tensor needed for auxiliary input. - kFwActivationStateQuantized = 4, - kBwActivationStateQuantized = 5, - kFwCellStateQuantized = 6, - kBwCellStateQuantized = 7, - kScalingFactors = 8, - kProductScalingFactors = 9, - kRecoveredCellWeights = 10, + kFwActivationStateQuantized = 3, + kBwActivationStateQuantized = 4, + kFwCellStateQuantized = 5, + kBwCellStateQuantized = 6, + kScalingFactors = 7, + kProductScalingFactors = 8, + kRecoveredCellWeights = 9, + kAuxInputQuantized = 10, // Optional, quantized tensor for auxiliary input. kNumTemporaryTensors = 11 }; @@ -469,7 +469,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayFree(node->temporaries); if (is_hybrid_op) { - node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors); + node->temporaries = TfLiteIntArrayCreate( + has_aux_input ? kNumTemporaryTensors : kNumTemporaryTensors - 1); } else { node->temporaries = TfLiteIntArrayCreate(2); // the two scratch buffers. } @@ -570,22 +571,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { input_quantized_size)); } - if (has_aux_input) { - node->temporaries->data[kAuxInputQuantized] = - *scratch_tensor_index + kAuxInputQuantized; - TfLiteTensor* aux_input_quantized = - GetTemporary(context, node, kAuxInputQuantized); - aux_input_quantized->type = kTfLiteUInt8; - aux_input_quantized->allocation_type = kTfLiteArenaRw; - if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) { - TfLiteIntArray* aux_input_quantized_size = - TfLiteIntArrayCopy(aux_input->dims); - TF_LITE_ENSURE_OK(context, - context->ResizeTensor(context, aux_input_quantized, - aux_input_quantized_size)); - } - } - node->temporaries->data[kFwActivationStateQuantized] = *scratch_tensor_index + kFwActivationStateQuantized; TfLiteTensor* fw_activation_state_quantized = @@ -691,6 +676,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { context->ResizeTensor(context, recovered_cell_weights, recovered_cell_weights_size)); } + + // Only allocate a temporary tensor for quantized auxiliary input if we are + // actually going to use it. + if (has_aux_input) { + node->temporaries->data[kAuxInputQuantized] = + *scratch_tensor_index + kAuxInputQuantized; + TfLiteTensor* aux_input_quantized = + GetTemporary(context, node, kAuxInputQuantized); + aux_input_quantized->type = kTfLiteUInt8; + aux_input_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) { + TfLiteIntArray* aux_input_quantized_size = + TfLiteIntArrayCopy(aux_input->dims); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, aux_input_quantized, + aux_input_quantized_size)); + } + } } return kTfLiteOk; } @@ -868,8 +871,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteUInt8: { TfLiteTensor* input_quantized = GetTemporary(context, node, kInputQuantized); - TfLiteTensor* aux_input_quantized = - GetTemporary(context, node, kAuxInputQuantized); TfLiteTensor* fw_activation_state_quantized = GetTemporary(context, node, kFwActivationStateQuantized); TfLiteTensor* bw_activation_state_quantized = @@ -884,6 +885,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kProductScalingFactors); TfLiteTensor* recovered_cell_weights = GetTemporary(context, node, kRecoveredCellWeights); + TfLiteTensor* aux_input_quantized = + (aux_input == nullptr) + ? nullptr + : GetTemporary(context, node, kAuxInputQuantized); TfLiteStatus fw_pass_status = lstm_eval::EvalHybrid( input, fw_input_to_input_weights, fw_input_to_forget_weights, -- GitLab From 0f1634bb5e1f056c560a6df93b7367fa4ddd62be Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 10 Oct 2018 11:53:16 -0700 Subject: [PATCH 215/411] Add convenient method result_shape() to hlo module PiperOrigin-RevId: 216567812 --- tensorflow/compiler/xla/service/hlo_module.h | 10 +++++++++- .../compiler/xla/service/hlo_rematerialization.cc | 2 +- tensorflow/compiler/xla/service/layout_assignment.cc | 5 ++--- .../compiler/xla/service/layout_assignment_test.cc | 3 +-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 735804e827..509b82c08a 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -99,7 +99,7 @@ class HloModule { HloComputation* DeepCloneComputation(HloComputation* computation, HloCloneContext* context = nullptr); - // Return a pointer to the entry computation of the module.. + // Return a pointer to the entry computation of the module. const HloComputation* entry_computation() const { CHECK_NE(nullptr, entry_computation_); return entry_computation_; @@ -109,6 +109,14 @@ class HloModule { return entry_computation_; } + // Returns the root instruction shape of entry computation. + // + // Precondition: entry_computation_ is not nullptr. + const Shape& result_shape() const { + CHECK_NE(nullptr, entry_computation_); + return entry_computation()->root_instruction()->shape(); + } + // Creates the ComputationLayout which describes the current status of the HLO // module entry computation. ComputationLayout compute_computation_layout() const { diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 5ac43808ee..49e46ecd00 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1215,7 +1215,7 @@ StatusOr HloRematerialization::Run(HloModule* module) { // by the caller. int64 module_output_size = 0; ShapeUtil::ForEachSubshape( - module->entry_computation()->root_instruction()->shape(), + module->result_shape(), [&module_output_size, this](const Shape& subshape, const ShapeIndex& /*index*/) { module_output_size += size_function_(subshape); diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 2cf5fc94ac..be0351fa6b 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -959,9 +959,8 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, module->entry_computation()) .result_layout(); if (result_layout.LayoutIsSet()) { - TF_RET_CHECK(ShapeUtil::Equal( - module->entry_computation()->root_instruction()->shape(), - result_layout.shape())); + TF_RET_CHECK( + ShapeUtil::Equal(module->result_shape(), result_layout.shape())); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index ff6fdb5e4a..a831751fa9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -1284,8 +1284,7 @@ ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})})); AssignLayouts(module.get(), &computation_layout); - ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(), - {{1, 0}, {1, 0}}); + ExpectTupleLayoutIs(module->result_shape(), {{1, 0}, {1, 0}}); const HloInstruction* custom_call = FindInstruction(module.get(), "custom-call"); -- GitLab From 069ff62df8f84285fac88fcd1718b34f91aeaa18 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Wed, 10 Oct 2018 12:07:56 -0700 Subject: [PATCH 216/411] Fix mul_test with Clang 8.0.0 PiperOrigin-RevId: 216570443 --- tensorflow/contrib/lite/kernels/mul_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc index 2807550a6b..0f9c0c2eee 100644 --- a/tensorflow/contrib/lite/kernels/mul_test.cc +++ b/tensorflow/contrib/lite/kernels/mul_test.cc @@ -107,7 +107,7 @@ TEST(FloatMulOpTest, ActivationRELU_N1_TO_1) { } TEST(FloatMulOpTest, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -124,7 +124,7 @@ TEST(FloatMulOpTest, VariousInputShapes) { } TEST(FloatMulOpTest, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -161,7 +161,7 @@ TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) { } TEST(IntegerMulOpTest, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, @@ -176,7 +176,7 @@ TEST(IntegerMulOpTest, VariousInputShapes) { } TEST(IntegerMulOpTest, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, @@ -245,7 +245,7 @@ float GetTolerance(int min, int max) { TEST(QuantizedMulOpTest, WithBroadcast) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedMulOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, -- GitLab From 7cf8899b2194c3049e65dc136751b2bfa6d79f5d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 12:09:01 -0700 Subject: [PATCH 217/411] Move from deprecated self.test_session() to self.session() or self.cached_session(). Move to cached_session() if the session is create more than once per test. Move to session() otherwise. self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function: * the session is not cached anymore (a new session is created). * the session is closed when exiting the "with" scope. PiperOrigin-RevId: 216570649 --- .../features/python/clip_weights_test.py | 4 +- .../python/random_tensor_pool_test.py | 10 ++-- .../features/python/virtual_batchnorm_test.py | 14 +++--- .../opt/python/training/adamax_test.py | 2 +- .../opt/python/training/addsign_test.py | 4 +- .../training/lazy_adam_optimizer_test.py | 2 +- .../opt/python/training/powersign_test.py | 4 +- tensorflow/contrib/optimizer_v2/adam_test.py | 2 +- .../optimizer_v2/checkpointable_utils_test.py | 10 ++-- .../contrib/optimizer_v2/rmsprop_test.py | 8 ++-- .../python/kernel_tests/core_rnn_cell_test.py | 2 +- .../rnn/python/kernel_tests/core_rnn_test.py | 46 +++++++++---------- .../rnn/python/kernel_tests/gru_ops_test.py | 12 ++--- .../rnn/python/kernel_tests/lstm_ops_test.py | 20 ++++---- .../rnn/python/kernel_tests/rnn_test.py | 10 ++-- .../kernel_tests/attention_wrapper_test.py | 6 +-- .../python/kernel_tests/basic_decoder_test.py | 14 +++--- .../kernel_tests/beam_search_ops_test.py | 6 +-- .../python/kernel_tests/decoder_test.py | 4 +- .../seq2seq/python/kernel_tests/loss_test.py | 2 +- 20 files changed, 91 insertions(+), 91 deletions(-) diff --git a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py index 2b7bb5f14e..e4fac1976d 100644 --- a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py +++ b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py @@ -47,13 +47,13 @@ class ClipWeightsTest(test.TestCase): train_op1 = opt.minimize(loss, var_list=self.variables) train_op2 = opt_clip.minimize(loss, var_list=self.variables) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) self.assertEqual(2.0, self.variables[0].eval()) sess.run(train_op1) self.assertLess(0.1, self.variables[0].eval()) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) self.assertEqual(2.0, self.variables[0].eval()) sess.run(train_op2) diff --git a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py index 08584dcd65..3c9dfd6de0 100644 --- a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py +++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py @@ -37,7 +37,7 @@ class TensorPoolTest(test.TestCase): output_value = tensor_pool(input_value, pool_size=10) self.assertEqual(output_value.shape.as_list(), [None, None, 3]) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(10): session.run(output_value, {input_value: [[[i] * 3]]}) session.run(output_value, {input_value: [[[i] * 3] * 2]}) @@ -49,7 +49,7 @@ class TensorPoolTest(test.TestCase): output_value = tensor_pool(input_value, pool_size=10) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: outs = [] for i in range(50): out = session.run(output_value, {input_value: i}) @@ -67,7 +67,7 @@ class TensorPoolTest(test.TestCase): input_value, pool_size=10, pooling_probability=0.0) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(50): out = session.run(output_value, {input_value: i}) self.assertEqual(out, i) @@ -83,7 +83,7 @@ class TensorPoolTest(test.TestCase): pooling_probability=pooling_probability) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: not_pooled = 0 total = 1000 for i in range(total): @@ -104,7 +104,7 @@ class TensorPoolTest(test.TestCase): for output_value in output_values: self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(10): outs = session.run(output_values, { input_values[0]: i, diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py index 2fe06a2872..ecfbb8a432 100644 --- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py +++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py @@ -59,7 +59,7 @@ class VirtualBatchnormTest(test.TestCase): mom_mean, mom_var = nn.moments(tensors, axes) vb_var = mean_sq - math_ops.square(vb_mean) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([ vb_mean, vb_var, mom_mean, mom_var]) @@ -93,7 +93,7 @@ class VirtualBatchnormTest(test.TestCase): vb_mean = array_ops.squeeze(vb_mean, batch_axis) vb_variance = array_ops.squeeze(vb_variance, batch_axis) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([ vb_mean, vb_variance, mom_mean, mom_variance]) @@ -116,7 +116,7 @@ class VirtualBatchnormTest(test.TestCase): vbn = virtual_batchnorm.VBN(batch, axis, batch_axis=batch_axis) vbn_normalized = vbn.reference_batch_normalization() - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_normalized_np, vbn_normalized_np = sess.run( @@ -142,7 +142,7 @@ class VirtualBatchnormTest(test.TestCase): vb_normed = array_ops.squeeze( vbn(array_ops.expand_dims(examples[i], [0])), [0]) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_np, vb_np = sess.run([batch_normalized, vb_normed]) self.assertAllClose(bn_np[i, ...], vb_np) @@ -167,7 +167,7 @@ class VirtualBatchnormTest(test.TestCase): vbn = virtual_batchnorm.VBN(reference_batch) vbn_fixed_example = array_ops.squeeze( vbn(array_ops.expand_dims(fixed_example, 0)), 0) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): variables_lib.global_variables_initializer().run() vbn_fixed_example_np = vbn_fixed_example.eval() @@ -180,7 +180,7 @@ class VirtualBatchnormTest(test.TestCase): minibatch = array_ops.stack([fixed_example] + examples) vbn_minibatch = vbn(minibatch) cur_vbn_fixed_example = vbn_minibatch[0, ...] - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): variables_lib.global_variables_initializer().run() cur_vbn_fixed_example_np = cur_vbn_fixed_example.eval() self.assertAllClose(vbn_fixed_example_np, cur_vbn_fixed_example_np) @@ -219,7 +219,7 @@ class VirtualBatchnormTest(test.TestCase): self.assertEqual(4, len(contrib_variables_lib.get_variables())) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() sess.run(to_fetch) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 61d8b94eca..a1e220924f 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -129,7 +129,7 @@ class AdaMaxOptimizerTest(test.TestCase): def testSparseDevicePlacement(self): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = variables.Variable([[1.0], [2.0]]) diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py index 6150fa117f..2c74acd9ff 100644 --- a/tensorflow/contrib/opt/python/training/addsign_test.py +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -66,7 +66,7 @@ class AddSignTest(test.TestCase): alpha=1.0, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -169,7 +169,7 @@ class AddSignTest(test.TestCase): alpha=1.0, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py index 089ecf597d..65ad724b3c 100644 --- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py @@ -107,7 +107,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters([False, True]) def testSparseDevicePlacement(self, use_resource): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). if use_resource: diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py index 1cf9901dc0..f2c87b5883 100644 --- a/tensorflow/contrib/opt/python/training/powersign_test.py +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -67,7 +67,7 @@ class PowerSignTest(test.TestCase): base=math.e, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -173,7 +173,7 @@ class PowerSignTest(test.TestCase): py_sign_decay_fn=None, base=math.e, beta=0.9): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 diff --git a/tensorflow/contrib/optimizer_v2/adam_test.py b/tensorflow/contrib/optimizer_v2/adam_test.py index b1ad0ade42..b55739f788 100644 --- a/tensorflow/contrib/optimizer_v2/adam_test.py +++ b/tensorflow/contrib/optimizer_v2/adam_test.py @@ -109,7 +109,7 @@ class AdamOptimizerTest(test.TestCase): def testSparseDevicePlacement(self): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = variables.Variable([[1.0], [2.0]]) diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py index 3e225ff0dd..6362d424ed 100644 --- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -337,7 +337,7 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - with ops.Graph().as_default(), self.test_session( + with ops.Graph().as_default(), self.session( graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) @@ -370,7 +370,7 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - with ops.Graph().as_default(), self.test_session( + with ops.Graph().as_default(), self.session( graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() # Don't actually train so we can test variable values @@ -688,7 +688,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() name_saver = core_saver.Saver() @@ -733,7 +733,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() save_path = root.save( @@ -752,7 +752,7 @@ class CheckpointCompatibilityTests(test.TestCase): save_path = root.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph): root = self._initialized_model() self._set_sentinels(root) diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py index 83f5971039..202c1e9afc 100644 --- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py +++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py @@ -89,7 +89,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): def testDense(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, use_resource) = tuple( param_value) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) @@ -213,7 +213,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): def testSparse(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, _) = tuple( param_value) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) @@ -287,7 +287,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters(_DATA_TYPES) def testWithoutMomentum(self, dtype): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) @@ -353,7 +353,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters(_DATA_TYPES) def testWithMomentum(self, dtype): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index be0306cb07..572df58e52 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -695,7 +695,7 @@ class RNNCellTest(test.TestCase): return gpu_dev = test.gpu_device_name() - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1, 3]) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index f31ad53d3c..5cba54dd3d 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -211,7 +211,7 @@ class RNNTest(test.TestCase): self.assertEqual(out.get_shape(), inp.get_shape()) self.assertEqual(out.dtype, inp.dtype) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) @@ -247,7 +247,7 @@ class RNNTest(test.TestCase): self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list()) self.assertEqual(out.dtype, inp.dtype) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) full_dropout_values = sess.run( @@ -274,7 +274,7 @@ class RNNTest(test.TestCase): cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32) self.assertEqual(len(dynamic_outputs), len(inputs)) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) dynamic_values = sess.run( dynamic_outputs, @@ -310,7 +310,7 @@ class RNNTest(test.TestCase): 1.0 * (2 + 1) * np.ones((input_size))))) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -372,7 +372,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell( @@ -394,7 +394,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell( @@ -424,7 +424,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) state_saver = TestStateSaver(batch_size, 2 * num_units) @@ -562,7 +562,7 @@ class LSTMTest(test.TestCase): batch_size = 2 num_proj = 4 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) inputs = max_length * [ @@ -659,7 +659,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -692,7 +692,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed) inputs = max_length * [ array_ops.placeholder(dtypes.float64, shape=(None, input_size)) @@ -728,7 +728,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: inputs = max_length * [ array_ops.placeholder(dtypes.float32, shape=(None, input_size)) ] @@ -784,7 +784,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: sequence_length = array_ops.placeholder(dtypes.int64) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -1117,7 +1117,7 @@ class LSTMTest(test.TestCase): state_is_tuple=False) ########### Step 1: Run static graph and generate readouts - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: if in_graph_mode: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1177,7 +1177,7 @@ class LSTMTest(test.TestCase): static_individual_variable_gradients, feed_dict=feeds) ########## Step 2: Run dynamic graph and generate readouts - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: if in_graph_mode: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1337,7 +1337,7 @@ class BidirectionalRNNTest(test.TestCase): return input_value, inputs, outputs, state_fw, state_bw, sequence_length def _testBidirectionalRNN(self, use_shape): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createBidirectionalRNN(use_shape, True)) variables_lib.global_variables_initializer().run() @@ -1384,7 +1384,7 @@ class BidirectionalRNNTest(test.TestCase): self.assertAllClose(s_fw, s_bw) def _testBidirectionalRNNWithoutSequenceLength(self, use_shape): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, _ = ( self._createBidirectionalRNN(use_shape, False)) variables_lib.global_variables_initializer().run() @@ -1472,7 +1472,7 @@ class BidirectionalRNNTest(test.TestCase): def _testBidirectionalDynamicRNN(self, use_shape, use_state_tuple, use_time_major, use_sequence_length): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createBidirectionalDynamicRNN( use_shape, use_state_tuple, use_time_major, use_sequence_length)) @@ -1549,7 +1549,7 @@ class BidirectionalRNNTest(test.TestCase): # REMARKS: factory(scope) is a function accepting a scope # as an argument, such scope can be None, a string # or a VariableScope instance. - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -1868,7 +1868,7 @@ class StateSaverRNNTest(test.TestCase): batch_size = 2 state_saver = TestStateSaver(batch_size, 2 * num_units) - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: self._factory(scope=scope, state_saver=state_saver) @@ -1945,7 +1945,7 @@ class GRUTest(test.TestCase): sequence_length = np.random.randint(0, time_steps, size=batch_size) - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1967,7 +1967,7 @@ class GRUTest(test.TestCase): sess.run([outputs_dynamic, state_dynamic], feed_dict=feeds) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -2253,7 +2253,7 @@ class RawRNNTest(test.TestCase): np.ones((max_time, batch_size, 1), np.int64), output_vals[1]) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -2370,7 +2370,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): sequence_length=sequence_length, dtype=dtypes.float32) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() variables_lib.global_variables_initializer().run() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py index b865466cc7..50d0da6eaf 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py @@ -41,7 +41,7 @@ from tensorflow.python.training import gradient_descent class GRUBlockCellTest(test.TestCase): def testNoneDimsWithDynamicRNN(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 @@ -58,7 +58,7 @@ class GRUBlockCellTest(test.TestCase): sess.run(output, feed) def testBlockGRUToGRUCellSingleStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 @@ -91,7 +91,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block, basic) def testBlockGRUToGRUCellMultiStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 2 cell_size = 3 input_size = 3 @@ -150,7 +150,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block_res[1], block_res[1]) def testDerivativeOfBlockGRUToGRUCellSingleStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 2 cell_size = 3 input_size = 4 @@ -220,7 +220,7 @@ class GRUBlockCellTest(test.TestCase): cell_size = 3 input_size = 4 time_steps = 2 - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: # Random initializers. seed = 1994 initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed) @@ -287,7 +287,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block, basic) def testGradient(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 1 cell_size = 3 input_size = 2 diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index ffd2421894..9ce0b399ba 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -130,7 +130,7 @@ def blocks_match(sess, use_peephole): class LSTMBlockCellTest(test.TestCase): def testNoneDimsWithDynamicRNN(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 num_steps = 5 input_dim = 6 @@ -147,7 +147,7 @@ class LSTMBlockCellTest(test.TestCase): sess.run(output, feed) def testLSTMBlockCell(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) @@ -175,7 +175,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(res[4], [[0.24024698, 0.24024698]]) def testCompatibleNames(self): - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = rnn_cell.LSTMCell(10) pcell = rnn_cell.LSTMCell(10, use_peepholes=True) inputs = [array_ops.zeros([4, 5])] * 6 @@ -186,7 +186,7 @@ class LSTMBlockCellTest(test.TestCase): for v in variables.trainable_variables() } - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockCell(10) pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 @@ -197,7 +197,7 @@ class LSTMBlockCellTest(test.TestCase): for v in variables.trainable_variables() } - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockFusedCell(10) pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True) inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6) @@ -212,7 +212,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertEqual(basic_names, fused_names) def testLSTMBasicToBlockCell(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) @@ -262,7 +262,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, block) def testLSTMBasicToBlockCellPeeping(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) @@ -315,7 +315,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, block) def testLSTMBasicToBlock(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads) = blocks_match( @@ -333,7 +333,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, fused, rtol=1e-6, atol=1e-6) def testLSTMBasicToBlockPeeping(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads) = blocks_match( @@ -352,7 +352,7 @@ class LSTMBlockCellTest(test.TestCase): def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: batch_size = 3 input_size = 4 cell_size = 5 diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py index eda8cb3c12..32df1db964 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py @@ -99,7 +99,7 @@ class StackBidirectionalRNNTest(test.TestCase): return input_value, inputs, outputs, state_fw, state_bw, sequence_length def _testStackBidirectionalRNN(self, use_gpu, use_shape): - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createStackBidirectionalRNN(use_gpu, use_shape, True)) variables.global_variables_initializer().run() @@ -159,7 +159,7 @@ class StackBidirectionalRNNTest(test.TestCase): # - Check that the state_5 and state_5' (forward and backward) are the # same for the first layer (it does not apply for the second layer since # it has forward-backward dependencies). - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: batch_size = 2 # Create states placeholders. initial_states_fw = [ @@ -281,7 +281,7 @@ class StackBidirectionalRNNTest(test.TestCase): def _testStackBidirectionalDynamicRNN(self, use_gpu, use_shape, use_state_tuple): - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createStackBidirectionalDynamicRNN(use_gpu, use_shape, use_state_tuple)) @@ -343,7 +343,7 @@ class StackBidirectionalRNNTest(test.TestCase): # - Check that the state_5 and state_5' (forward and backward) are the # same for the first layer (it does not apply for the second layer since # it has forward-backward dependencies). - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: batch_size = 2 # Create states placeholders. initial_states_fw = [ @@ -414,7 +414,7 @@ class StackBidirectionalRNNTest(test.TestCase): # REMARKS: factory(scope) is a function accepting a scope # as an argument, such scope can be None, a string # or a VariableScope instance. - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 1f3b533de9..c1e36b2ea3 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -181,7 +181,7 @@ class AttentionWrapperTest(test.TestCase): for creator, depth in zip(create_attention_mechanisms, attention_mechanism_depths)] - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with vs.variable_scope( 'root', initializer=init_ops.random_normal_initializer(stddev=0.01, seed=3)): @@ -724,7 +724,7 @@ class AttentionWrapperTest(test.TestCase): def testBahdanauMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: a = wrapper.BahdanauMonotonicAttention( d, random_ops.random_normal((b, t, u)), @@ -898,7 +898,7 @@ class AttentionWrapperTest(test.TestCase): def testLuongMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: a = wrapper.LuongMonotonicAttention( d, random_ops.random_normal((b, t, u)), diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py index fa3f074c67..b7f9f3fb09 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py @@ -50,7 +50,7 @@ class BasicDecoderTest(test.TestCase): cell_depth = 10 output_layer_depth = 3 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(cell_depth) @@ -136,7 +136,7 @@ class BasicDecoderTest(test.TestCase): start_tokens = np.random.randint(0, vocabulary_size, size=batch_size) end_token = 1 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: embeddings = np.random.randn(vocabulary_size, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(vocabulary_size) @@ -209,7 +209,7 @@ class BasicDecoderTest(test.TestCase): start_tokens = np.random.randint(0, vocabulary_size, size=batch_size) end_token = 1 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithSampleEmbeddingHelper", initializer=init_ops.constant_initializer(0.01)): @@ -278,7 +278,7 @@ class BasicDecoderTest(test.TestCase): input_depth = 7 vocabulary_size = 10 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn( batch_size, max_time, input_depth).astype(np.float32) embeddings = np.random.randn( @@ -371,7 +371,7 @@ class BasicDecoderTest(test.TestCase): else: auxiliary_inputs = None - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(cell_depth) @@ -523,7 +523,7 @@ class BasicDecoderTest(test.TestCase): lambda x: array_ops.one_hot(x, vocabulary_size, dtype=dtypes.float32)) end_fn = lambda sample_ids: math_ops.equal(sample_ids, end_token) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithInferenceHelper", initializer=init_ops.constant_initializer(0.01)): @@ -604,7 +604,7 @@ class BasicDecoderTest(test.TestCase): next_inputs_fn = math_ops.to_float end_fn = lambda sample_ids: sample_ids[:, end_token] - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithInferenceHelper", initializer=init_ops.constant_initializer(0.01)): diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py index 9662a5780a..b41734d214 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py @@ -49,7 +49,7 @@ class GatherTreeTest(test.TestCase): parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=end_token) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): self.assertAllEqual(expected_result, beams.eval()) def testBadParentValuesOnCPU(self): @@ -93,7 +93,7 @@ class GatherTreeTest(test.TestCase): parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=end_token) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): self.assertAllEqual(expected_result, beams.eval()) def testGatherTreeBatch(self): @@ -103,7 +103,7 @@ class GatherTreeTest(test.TestCase): max_sequence_lengths = [0, 1, 2, 4, 7, 8, 9, 10, 11, 0] end_token = 5 - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): step_ids = np.random.randint( 0, high=end_token + 1, size=(max_time, batch_size, beam_width)) parent_ids = np.random.randint( diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py index b549cbf568..4c25489fad 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py @@ -44,7 +44,7 @@ class DynamicDecodeRNNTest(test.TestCase): cell_depth = 10 max_out = max(sequence_length) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: if time_major: inputs = np.random.randn(max_time, batch_size, input_depth).astype(np.float32) @@ -126,7 +126,7 @@ class DynamicDecodeRNNTest(test.TestCase): cell_depth = 10 max_out = max(sequence_length) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py index 35c601a4bc..5aa32b532f 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py @@ -34,7 +34,7 @@ from tensorflow.python.platform import test class LossTest(test.TestCase): def testSequenceLoss(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): batch_size = 2 -- GitLab From b77648b3418f0cc66f249226c77a79d5d34f7618 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 12:15:48 -0700 Subject: [PATCH 218/411] Modify getqualifiedname to iterate over a copy of the namespace, to avoid "dictionary changed size during iteration" errors. PiperOrigin-RevId: 216571661 --- tensorflow/python/autograph/pyct/inspect_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py index 29c406c248..1fc3c6006d 100644 --- a/tensorflow/python/autograph/pyct/inspect_utils.py +++ b/tensorflow/python/autograph/pyct/inspect_utils.py @@ -92,7 +92,10 @@ def getqualifiedname(namespace, object_, max_depth=2): # TODO(mdan): Use breadth-first search and avoid visiting modules twice. if max_depth: - for name, value in namespace.items(): + # Iterating over a copy prevents "changed size due to iteration" errors. + # It's unclear why those occur - suspecting new modules may load during + # iteration. + for name, value in namespace.copy().items(): if tf_inspect.ismodule(value): name_in_module = getqualifiedname(value.__dict__, object_, max_depth - 1) -- GitLab From b6335dfe51ac1ac6c947c71577f41a24a13fe547 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 12:32:43 -0700 Subject: [PATCH 219/411] Internal change. PiperOrigin-RevId: 216574118 --- .../kernels/bidirectional_sequence_lstm.cc | 57 ++++++++++++++++--- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc index 1137f05fa6..0d9863ae8d 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc @@ -151,7 +151,7 @@ void Free(TfLiteContext* context, void* buffer) { } // Check that input tensor dimensions matches with each other. -TfLiteStatus CheckLstmTensorDimensions( +TfLiteStatus CheckLstmTensorDimensionsAndTypes( TfLiteContext* context, TfLiteNode* node, int n_input, int n_output, int n_cell, int input_to_input_weights_tensor, int input_to_forget_weights_tensor, int input_to_cell_weights_tensor, @@ -172,25 +172,39 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE(context, params->cell_clip >= 0); TF_LITE_ENSURE(context, params->proj_clip >= 0); + const TfLiteTensor* input_to_forget_weights = + GetInput(context, node, input_to_forget_weights_tensor); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); + TF_LITE_ENSURE(context, (input_to_forget_weights->type == kTfLiteFloat32) || + (input_to_forget_weights->type == kTfLiteUInt8)); + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, input_to_input_weights_tensor); if (input_to_input_weights) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_input_weights->type, + input_to_forget_weights->type); } - const TfLiteTensor* input_to_forget_weights = - GetInput(context, node, input_to_forget_weights_tensor); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); - const TfLiteTensor* input_to_cell_weights = GetInput(context, node, input_to_cell_weights_tensor); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_cell_weights->type, + input_to_forget_weights->type); + + const TfLiteTensor* input_to_output_weights = + GetInput(context, node, input_to_output_weights_tensor); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->type, + input_to_forget_weights->type); const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor); @@ -200,6 +214,8 @@ TfLiteStatus CheckLstmTensorDimensions( n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* recurrent_to_forget_weights = @@ -209,6 +225,8 @@ TfLiteStatus CheckLstmTensorDimensions( n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->type, + input_to_forget_weights->type); const TfLiteTensor* recurrent_to_cell_weights = GetInput(context, node, recurrent_to_cell_weights_tensor); @@ -216,6 +234,8 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->type, + input_to_forget_weights->type); // We make sure the input-gate's parameters are either both present (regular // LSTM) or not at all (CIFG-LSTM). @@ -231,6 +251,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_input_weights) { TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_input_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* cell_to_forget_weights = @@ -238,6 +260,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_forget_weights) { TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* cell_to_output_weights = @@ -245,6 +269,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_output_weights) { TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_output_weights->type, + input_to_forget_weights->type); } // Making sure the peephole weights are there all or none. @@ -266,22 +292,26 @@ TfLiteStatus CheckLstmTensorDimensions( } else { TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_gate_bias->type, kTfLiteFloat32); } const TfLiteTensor* forget_gate_bias = GetInput(context, node, forget_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, forget_gate_bias->type, kTfLiteFloat32); const TfLiteTensor* cell_bias = GetInput(context, node, cell_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, cell_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_bias->type, kTfLiteFloat32); const TfLiteTensor* output_gate_bias = GetInput(context, node, output_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, output_gate_bias->type, kTfLiteFloat32); const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, projection_weights_tensor); @@ -289,6 +319,8 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); + TF_LITE_ENSURE_EQ(context, projection_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* projection_bias = @@ -296,6 +328,7 @@ TfLiteStatus CheckLstmTensorDimensions( if (projection_bias) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); + TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteFloat32); } // Making sure the projection tensors are consistent: @@ -315,7 +348,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, int n_output, int n_cell) { TF_LITE_ENSURE_OK( context, - CheckLstmTensorDimensions( + CheckLstmTensorDimensionsAndTypes( context, node, n_input, n_output, n_cell, kFwInputToInputWeightsTensor, kFwInputToForgetWeightsTensor, kFwInputToCellWeightsTensor, kFwInputToOutputWeightsTensor, @@ -329,7 +362,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_OK( context, - CheckLstmTensorDimensions( + CheckLstmTensorDimensionsAndTypes( context, node, n_input, n_output, n_cell, kBwInputToInputWeightsTensor, kBwInputToForgetWeightsTensor, kBwInputToCellWeightsTensor, kBwInputToOutputWeightsTensor, @@ -379,12 +412,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->type, + fw_input_to_output_weights->type); const TfLiteTensor* fw_recurrent_to_output_weights = GetInput(context, node, kFwRecurrentToOutputWeightsTensor); TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->data[0], n_fw_cell); + TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->type, + fw_input_to_output_weights->type); const int n_fw_output = fw_recurrent_to_output_weights->dims->data[1]; const TfLiteTensor* bw_recurrent_to_output_weights = @@ -392,6 +429,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0], n_bw_cell); + TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->type, + fw_input_to_output_weights->type); const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1]; // Check that input tensor dimensions matches with each other. -- GitLab From dbbb44ec54ea4d8ae028eed6cae9240112d8340c Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 10 Oct 2018 13:00:46 -0700 Subject: [PATCH 220/411] [XLA] Finish NumUniqueInstructionIds() cleanup. NumUniqueInstructionIds() is no longer a good proxy for the number of instructions. Remove the API, and use the real number of instructions instead of the remaining uses. PiperOrigin-RevId: 216578465 --- .../compiler/xla/service/dfs_hlo_visitor.cc | 4 +- .../compiler/xla/service/dfs_hlo_visitor.h | 56 ++++--------------- .../compiler/xla/service/hlo_instruction.cc | 2 +- tensorflow/compiler/xla/service/hlo_module.h | 4 -- .../xla/service/tuple_points_to_analysis.cc | 2 +- .../xla/service/tuple_points_to_analysis.h | 20 +++++-- 6 files changed, 31 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc index 3e7373adc5..c54f81e691 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc @@ -50,7 +50,7 @@ void DfsHloVisitorBase::SetVisiting( const HloInstruction& instruction) { VLOG(3) << "marking HLO " << &instruction << " as visiting: "; DCHECK(NotVisited(instruction)); - visit_state_.SetState(instruction.unique_id(), VisitState::kVisiting); + visit_state_[instruction.unique_id()] = VisitState::kVisiting; } template @@ -58,7 +58,7 @@ void DfsHloVisitorBase::SetVisited( const HloInstruction& instruction) { VLOG(3) << "marking HLO " << &instruction << " as visited: "; DCHECK(NotVisited(instruction) || IsVisiting(instruction)); - visit_state_.SetState(instruction.unique_id(), VisitState::kVisited); + visit_state_[instruction.unique_id()] = VisitState::kVisited; } template diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 68d01d75a2..4159aa281f 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/literal.h" @@ -263,21 +264,25 @@ class DfsHloVisitorBase { kVisited = 2, }; - VisitState GetVisitState(int id) { return visit_state_.GetState(id); } + VisitState GetVisitState(int id) { + auto iter = visit_state_.find(id); + if (iter == visit_state_.end()) { + return VisitState::kNotVisited; + } + return iter->second; + } VisitState GetVisitState(const HloInstruction& instruction); // Resize internal state if necessary to hold state for ids <= num. // This call is purely a performance hint and can be omitted without // affecting correctness. - void ReserveVisitStates(int num) { visit_state_.Reserve(num); } + void ReserveVisitStates(int num) { visit_state_.reserve(num); } // Useful when we want to visit the same computation more than once with the // same visitor. - void ResetVisitStates() { visit_state_.Reset(); } + void ResetVisitStates() { visit_state_.clear(); } - void SetVisitState(int id, VisitState state) { - visit_state_.SetState(id, state); - } + void SetVisitState(int id, VisitState state) { visit_state_[id] = state; } // Sets the visitation state of the given instruction as kVisiting. // @@ -326,44 +331,7 @@ class DfsHloVisitorBase { virtual Status Postprocess(HloInstructionPtr hlo); private: - class DFSVisitStates { - public: - DFSVisitStates() {} - void Reserve(uint64 num) { - states_.reserve((num + kStatesPerWord - 1) / kStatesPerWord); - } - VisitState GetState(uint64 id) { - uint64 word_index = id / kStatesPerWord; - if (word_index >= states_.size()) { - return VisitState::kNotVisited; - } - static_assert(static_cast(VisitState::kVisited) < 3, - "VisitState must fit in two bits"); - uint64 w = states_[word_index]; - uint32 shift = 2 * (id % kStatesPerWord); // 2 bits per state - return static_cast((w >> shift) & 0x3); - } - void SetState(uint64 id, VisitState state) { - uint64 word_index = id / kStatesPerWord; - if (word_index >= states_.size()) { - states_.resize(word_index + 1, 0); - } - uint64* w = &states_[word_index]; - uint32 shift = 2 * (id % kStatesPerWord); // 2 bits per state - uint64 mask = 0x3ull << shift; - *w = (*w & ~mask) | (static_cast(state) << shift); - DCHECK_EQ(GetState(id), state); - } - void Reset() { states_.clear(); } - - private: - static const uint32 kStatesPerWord = sizeof(uint64) / 2 /*bits per entry*/; - // Map from id to two-bit states. We store 32 such states per 64-bit - // value - std::vector states_; - }; - - DFSVisitStates visit_state_; + absl::flat_hash_map visit_state_; TF_DISALLOW_COPY_AND_ASSIGN(DfsHloVisitorBase); }; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c317e9e3b4..306d29a2ae 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2483,7 +2483,7 @@ template static Status PostOrderDFS(HloInstruction* root, Visitor* visitor, const InternalCompareFunction* operand_order, bool ignore_control_predecessors) { - visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds()); + visitor->ReserveVisitStates(root->GetModule()->instruction_count()); // dfs_stack holds pairs of unique_id(), HloInstruction*>. // diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 509b82c08a..a1ffdc4023 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -220,10 +220,6 @@ class HloModule { return result; } - // Returns the number of unique intruction ids given out. All ids up to - // this point are guaranteed to be in the range [0..NumUniqueInstructionIds()) - int NumUniqueInstructionIds() const { return next_unique_id_; } - // Returns an id that is unique to this module across all modules created over // the lifetime of this process. int unique_id() const { return unique_id_; } diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index ef4e69180d..96f3055c98 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -148,7 +148,7 @@ TuplePointsToAnalysis::Run(const HloModule* module) { Status TuplePointsToAnalysis::Analyze() { per_instruction_.clear(); - per_instruction_.resize(module_->NumUniqueInstructionIds()); + per_instruction_.reserve(module_->instruction_count()); logical_buffer_aliases_.clear(); logical_buffer_aliases_.resize( diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index 30c365053c..bcfcb388f9 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" @@ -315,14 +316,23 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { const PerInstruction* PerInst(const HloInstruction* inst) const { int id = inst->unique_id(); DCHECK_GE(id, 0); - DCHECK_LT(id, per_instruction_.size()); - return &per_instruction_[id]; + auto iter = per_instruction_.find(id); + if (iter == per_instruction_.end()) { + LOG(FATAL) << "Expected per-instruction information to already exist"; + } else { + return iter->second.get(); + } } PerInstruction* PerInst(const HloInstruction* inst) { int id = inst->unique_id(); DCHECK_GE(id, 0); - DCHECK_LT(id, per_instruction_.size()); - return &per_instruction_[id]; + auto iter = per_instruction_.find(id); + if (iter == per_instruction_.end()) { + return per_instruction_.emplace(id, absl::make_unique()) + .first->second.get(); + } else { + return iter->second.get(); + } } std::vector> GetAllUsesOfInstructionAtIndex( @@ -339,7 +349,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { const std::unique_ptr logical_buffer_analysis_; // A map from instruction->unique_id() to - std::vector per_instruction_; + absl::flat_hash_map> per_instruction_; // A map from LogicalBuffer->id() to alias information about that logical // buffer -- GitLab From 7ea8eca41a309b98355c4ed0dae0f5b176590dcd Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 10 Oct 2018 13:11:14 -0700 Subject: [PATCH 221/411] Pass rewriter config into PartitionedCallOp and run the grappler optimization pass Grappler is disabled by default. Can be controlled through the config argument to enable_eager_execution(), by explicitly setting graph_options.rewrite_options.disable_meta_optimizer to False. Benchmarks (with grappler enabled): benchmark_defun_matmul_100_by_784_CPU Before: 353.947162628 After: 322.019815445 benchmark_defun_matmul_2_by_2_CPU Before: 76.9423325857 After: 72.8042999903 benchmark_defun_matmul_2_by_2_CPU_async Before: 38.6896689733 After: 39.6879593531 benchmark_defun_matmul_forward_backward_2_by_2_CPU Before: 189.12516435 After: 182.339771589 benchmark_defun_matmul_forward_backward_2_by_2_CPU_async Before: 222.475560506 After: 245.545005798 benchmark_defun_with_signature Before: 120.702934265 After: 122.84330527 benchmark_defun_with_signature_and_kwargs Before: 129.103032748 After: 140.212361018 benchmark_defun_without_signature Before: 63.2988293966 After: 62.6370668411 benchmark_defun_without_signature_and_with_kwargs Before: 73.5487699509 After: 73.1824000676 PiperOrigin-RevId: 216580246 --- .../grappler/optimizers/meta_optimizer.cc | 2 + tensorflow/core/kernels/BUILD | 4 + .../core/kernels/partitioned_function_ops.cc | 85 ++++++++++++++++--- tensorflow/core/ops/functional_ops.cc | 2 + tensorflow/python/BUILD | 1 + tensorflow/python/eager/context.py | 46 +++++++++- tensorflow/python/eager/function.py | 3 +- tensorflow/python/eager/function_test.py | 14 +++ tensorflow/python/ops/functional_ops.py | 37 +++++++- 9 files changed, 174 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 225c0a91e3..b8f4599f56 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -123,6 +123,8 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) : cpu_device_(cpu_device), cfg_(cfg) { + DCHECK(cpu_device_ == nullptr || + cpu_device_->attributes().device_type() == "CPU"); // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs() // if we want to the threadpool for parallelizing Grappler const int kNumThreads = 1; diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 1ca9c7b7f5..f0a2924378 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2127,6 +2127,10 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:functional_ops_op_lib", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler/clusters:virtual_cluster", + "//tensorflow/core/grappler/optimizers:meta_optimizer", ], ) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 3979e4b53a..b2b3cef59b 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -24,6 +24,10 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/graph_partition.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/meta_optimizer.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" #include "tensorflow/core/util/ptr_util.h" #include "tensorflow/core/util/reffed_status_callback.h" @@ -35,7 +39,6 @@ namespace tensorflow { typedef FunctionLibraryRuntime::Handle FHandle; namespace { - // A `PartitionedCallOp` asynchronously executes a function, potentially across // multiple devices but within a single process. The kernel places and // partitions a given function's underlying graph, and executes each of the @@ -46,6 +49,12 @@ class PartitionedCallOp : public AsyncOpKernel { public: explicit PartitionedCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); + string rewriter_config_serialized; + OP_REQUIRES_OK(ctx, ctx->GetAttr("config", &rewriter_config_serialized)); + OP_REQUIRES( + ctx, rewriter_config_.ParseFromString(rewriter_config_serialized), + errors::InvalidArgument("Unable to parse rewriter_config string as " + "tensorflow::RewriterConfig proto.")); } ~PartitionedCallOp() override {} @@ -109,8 +118,7 @@ class PartitionedCallOp : public AsyncOpKernel { // by name. auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); - TF_CHECK_OK( - graph.get()->AddFunctionLibrary(global_flib.ToProto())); + TF_CHECK_OK(graph->AddFunctionLibrary(global_flib.ToProto())); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -159,6 +167,17 @@ class PartitionedCallOp : public AsyncOpKernel { optimization_options), done); + Device* cpu_device; + OP_REQUIRES_OK_ASYNC( + ctx, lib->device_mgr()->LookupDevice("CPU:0", &cpu_device), done); + + // Run grappler passes on the graph. It is possible that these are + // optimized by the graph executor already. + OP_REQUIRES_OK_ASYNC(ctx, + OptimizeGraph(ctx, fbody->ret_nodes, overlay_lib, + device_set, cpu_device, &graph), + done); + std::unordered_map> subgraphs; OP_REQUIRES_OK_ASYNC( ctx, PartitionHelper(device_set, std::move(graph), &subgraphs), @@ -266,8 +285,7 @@ class PartitionedCallOp : public AsyncOpKernel { for (const auto& partition : partitions) { std::unique_ptr subgraph(new Graph(graph->flib_def())); FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); - TF_CHECK_OK( - subgraph.get()->AddFunctionLibrary(global_flib.ToProto())); + TF_CHECK_OK(subgraph->AddFunctionLibrary(global_flib.ToProto())); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = true; @@ -317,14 +335,6 @@ class PartitionedCallOp : public AsyncOpKernel { } } - // Rewrite the indices of the Arg and Retval nodes for this function - // to range from 0 to the number of Arg nodes, Retval nodes, respectively. - auto sort_by_index = [](std::pair one, - std::pair two) -> bool { - return one.second < two.second; - }; - std::sort(arg_nodes.begin(), arg_nodes.end(), sort_by_index); - std::sort(ret_nodes.begin(), ret_nodes.end(), sort_by_index); for (int i = 0; i < arg_nodes.size(); ++i) { Node* arg = arg_nodes[i].first; arg->AddAttr("index", i); @@ -470,7 +480,56 @@ class PartitionedCallOp : public AsyncOpKernel { } } + Status OptimizeGraph(OpKernelContext* ctx, + const gtl::InlinedVector& ret_nodes, + FunctionLibraryDefinition* flib, + const DeviceSet& device_set, Device* cpu_device, + std::unique_ptr* graph) { + if (!tensorflow::grappler::MetaOptimizerEnabled(rewriter_config_)) { + return Status::OK(); + } + + tensorflow::grappler::GrapplerItem item; + + // Add fetches so that the graph can be pruned. + for (Node* node : ret_nodes) { + item.fetch.push_back(node->name()); + } + + (*graph)->ToGraphDef(&item.graph); + + if (flib) { + *item.graph.mutable_library() = flib->ToProto(); + } + + tensorflow::GraphDef out_graph; + + tensorflow::grappler::VirtualCluster cluster(&device_set); + + // TODO(nareshmodi): Consider adding and using the more generic GraphOptions + // proto (which also contain the OptimizerOptions). + TF_RETURN_IF_ERROR(tensorflow::grappler::RunMetaOptimizer( + item, rewriter_config_, cpu_device, &cluster, &out_graph)); + + std::unique_ptr optimized_graph(new Graph(OpRegistry::Global())); + TF_RETURN_IF_ERROR(ConvertGraphDefToGraph( + GraphConstructorOptions(), out_graph, optimized_graph.get())); + + *graph = std::move(optimized_graph); + + // The graph conversion sets the requested device names but not the + // assigned device names. However, since at this point the graph is + // placed TF expects an assigned device name for every node. Therefore + // we copy the requested device into the assigned device field. + for (Node* node : graph->get()->nodes()) { + node->set_assigned_device_name(node->requested_device()); + } + + return Status::OK(); + } + NameAttrList func_; + RewriterConfig rewriter_config_; string local_device_name_; // Contains maps from device names to handles of function partitions, keyed by // FunctionLibraryRuntime pointers. (Because this kernel may be instantiated diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc index 22b4b07eff..17b15108a9 100644 --- a/tensorflow/core/ops/functional_ops.cc +++ b/tensorflow/core/ops/functional_ops.cc @@ -225,6 +225,7 @@ REGISTER_OP("PartitionedCall") .Attr("Tin: list(type) >= 0") .Attr("Tout: list(type) >= 0") .Attr("f: func") + .Attr("config: string = ''") .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("StatefulPartitionedCall") @@ -233,6 +234,7 @@ REGISTER_OP("StatefulPartitionedCall") .Attr("Tin: list(type) >= 0") .Attr("Tout: list(type) >= 0") .Attr("f: func") + .Attr("config: string = ''") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape); diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 18ade384f5..c4b5be7a34 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1154,6 +1154,7 @@ py_library( ":tensor_shape", ":util", ":variable_scope", + "//tensorflow/core:protos_all_py", "//tensorflow/python/eager:context", ], ) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 778ff85342..74e648ee6f 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -25,6 +25,7 @@ import random import threading from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import c_api_util from tensorflow.python.framework import device as pydev @@ -84,7 +85,7 @@ class _EagerTensorCache(object): class _EagerContext(threading.local): """Thread local eager context.""" - def __init__(self): + def __init__(self, config=None): super(_EagerContext, self).__init__() self.device_spec = pydev.DeviceSpec.from_string("") self.device_name = self.device_spec.to_string() @@ -97,6 +98,12 @@ class _EagerContext(threading.local): self.ones_rank_cache = _EagerTensorCache() self.zeros_cache = _EagerTensorCache() self.execution_mode = None + self.rewriter_config = None + if config is not None and config.HasField( + "graph_options") and config.graph_options.HasField("rewrite_options"): + self.rewriter_config = ( + config.graph_options.rewrite_options.SerializeToString()) + ContextSwitch = collections.namedtuple( @@ -191,7 +198,7 @@ class Context(object): Raises: ValueError: If execution_mode is not valid. """ - self._eager_context = _EagerContext() + self._eager_context = _EagerContext(config) self._context_switches = _ContextSwitchStack(self.executing_eagerly()) self._context_handle = None self._context_devices = None @@ -361,6 +368,36 @@ class Context(object): if mode == EAGER_MODE: self.context_switches.pop() + @tf_contextlib.contextmanager + def rewriter_config(self, rewriter_config_=None): + """A context manager to allow setting the grappler rewrite options. + + Args: + rewriter_config_: A tensorflow.RewriterConfig proto object. + + Yields: + Nothing. + + Raises: + ValueError: if rewriter_config is not a tensorflow.RewriterConfig proto. + """ + if rewriter_config_ is None or not isinstance( + rewriter_config_, rewriter_config_pb2.RewriterConfig): + raise ValueError("Must pass a rewriter_config proto") + + ctx = self._eager_context + old_rewriter_config = ctx.rewriter_config + ctx.rewriter_config = rewriter_config_.SerializeToString() + try: + yield + finally: + ctx.rewriter_config = old_rewriter_config + + @property + def rewriter_config_string(self): + """Returns the serialized rewriter_config for the current thread.""" + return self._eager_context.rewriter_config + def executing_eagerly(self): """Returns True if current thread has eager executing enabled.""" return self._eager_context.is_eager @@ -783,6 +820,11 @@ def export_run_metadata(): return context().export_run_metadata() +def rewriter_config(rewriter_config_): + """Context manager for setting the grappler rewrite config.""" + return context().rewriter_config(rewriter_config_) + + def set_server_def(server_def): context().set_server_def(server_def) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f1a63adce1..191279abae 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -501,7 +501,8 @@ class _EagerDefinedFunction(object): args=args, f=self, tout=self._output_types, - executing_eagerly=executing_eagerly) + executing_eagerly=executing_eagerly, + config=ctx.rewriter_config_string) # pylint: disable=protected-access if executing_eagerly: return outputs diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 953f4300cf..7b708622f1 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -95,6 +95,20 @@ class FunctionTest(test.TestCase): self.assertAllEqual(sq.numpy().reshape(-1), [10, 14, 14, 20]) self.assertAllEqual(sq2.numpy().reshape(-1), [52, 76, 74, 108]) + def testWastedAdd(self): + + @function.defun() + def add(x, y): + _ = x * y + return x + y + + # The default config allows everything. + rewrites = rewriter_config_pb2.RewriterConfig() + + with context.rewriter_config(rewrites): + t = constant_op.constant(1.0) + self.assertAllEqual(add(t, t).numpy(), 2.0) + def testBasicGraphMode(self): matmul = function.defun(math_ops.matmul) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index 119d9522bd..f8a95f5e62 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -23,6 +23,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -979,8 +980,19 @@ def For(start, return ret # pylint: enable=invalid-name,protected-access +_rewriter_config_optimizer_disabled = None -def partitioned_call(args, f, tout=None, executing_eagerly=None): + +def _get_disabled_rewriter_config(): + global _rewriter_config_optimizer_disabled + if _rewriter_config_optimizer_disabled is None: + rewriter_config = rewriter_config_pb2.RewriterConfig() + rewriter_config.disable_meta_optimizer = True + _rewriter_config_optimizer_disabled = rewriter_config.SerializeToString() + return _rewriter_config_optimizer_disabled + + +def partitioned_call(args, f, tout=None, executing_eagerly=None, config=None): """Executes a function while respecting device annotations. Currently, only those functions that execute within the same address space @@ -994,6 +1006,9 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None): the signature of `f`. executing_eagerly: (Optional) A boolean indicating whether the context is executing eagerly. If `None`, fetched from the global context. + config: (Optional) A tensorflow::RewriterConfig proto, serialized. If + `None`, all optimizations are disabled. Currently only handled for eager + defined functions. Returns: The list of `Tensor`s returned by invoking `f(args)`. If the function does @@ -1007,12 +1022,16 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None): if executing_eagerly is None: executing_eagerly = context.executing_eagerly() + if config is None: + config = _get_disabled_rewriter_config() + if executing_eagerly or len(tout): if f.stateful_ops: outputs = gen_functional_ops.stateful_partitioned_call( - args=args, Tout=tout, f=f) + args=args, Tout=tout, f=f, config=config) else: - outputs = gen_functional_ops.partitioned_call(args=args, Tout=tout, f=f) + outputs = gen_functional_ops.partitioned_call( + args=args, Tout=tout, f=f, config=config) return outputs if outputs else None # The generated binding returns an empty list for functions that don't @@ -1026,6 +1045,11 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None): func_attr = attr_value_pb2.AttrValue( func=attr_value_pb2.NameAttrList(name=f.name)) + # When running in graph mode, the graph and function graphs are optimized + # (i.e. run through grappler) per the session options, so we can disable any + # eager-specific rewriting. + rewriter_config = attr_value_pb2.AttrValue(s=_get_disabled_rewriter_config()) + graph = ops.get_default_graph() f.add_to_graph(graph) op_name = "StatefulPartitionedCall" if f.stateful_ops else "PartitionedCall" @@ -1035,6 +1059,11 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None): tout, compute_shapes=False, name="PartitionedFunctionCall", - attrs={"Tin": tin_attr, "Tout": tout_attr, "f": func_attr}) + attrs={ + "Tin": tin_attr, + "Tout": tout_attr, + "f": func_attr, + "config": rewriter_config + }) outputs = op.outputs return outputs if outputs else op -- GitLab From 07ab34180b746f767614ccdafbb1a8e9bb7b1641 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 13:21:12 -0700 Subject: [PATCH 222/411] Update ops-related pbtxt files. PiperOrigin-RevId: 216581784 --- .../core/ops/compat/ops_history.v1.pbtxt | 65 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 14 ++++ 2 files changed, 79 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 415e15b720..713ca5a651 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -39601,6 +39601,38 @@ op { type: "func" } } +op { + name: "PartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } +} op { name: "Placeholder" output_arg { @@ -71002,6 +71034,39 @@ op { } is_stateful: true } +op { + name: "StatefulPartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} op { name: "StatelessIf" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index a8da95dea3..78f796fb7f 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -20050,6 +20050,13 @@ op { name: "f" type: "func" } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } } op { name: "Placeholder" @@ -33156,6 +33163,13 @@ op { name: "f" type: "func" } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } is_stateful: true } op { -- GitLab From 93ee9924edfe6d012d6df76b94c290d3d0974848 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 10 Oct 2018 13:21:43 -0700 Subject: [PATCH 223/411] patch the "read the toco guide" button in the index. PiperOrigin-RevId: 216581868 --- tensorflow/contrib/lite/g3doc/_index.yaml | 8 +++--- .../g3doc/tflite_convert/cmdline_examples.md | 14 +++++----- .../g3doc/tflite_convert/cmdline_reference.md | 2 +- .../lite/g3doc/tflite_convert/python_api.md | 26 +++++++++---------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml index bc66cc5dc1..eb32d3e94a 100644 --- a/tensorflow/contrib/lite/g3doc/_index.yaml +++ b/tensorflow/contrib/lite/g3doc/_index.yaml @@ -97,7 +97,7 @@ landing_page: path: https://www.shazam.com/ - custom_image: path: ./images/landing-page/nest_logo.png - path: https://nest.com/ + path: https://nest.com/ - custom_image: path: ./images/landing-page/loseit_logo.png path: https://www.loseit.com/ @@ -129,10 +129,10 @@ landing_page: icon_name: autorenew description: > Convert a TensorFlow model into a compressed flat buffer with the - TensorFlow Lite Optimizing Converter (TOCO). + TensorFlow Lite Converter. buttons: - - label: Read the TOCO guide - path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/g3doc/python_api.md + - label: Read the converter guide + path: /lite/tflite_convert/python_api classname: button button-primary tfo-button-primary - heading: Deploy icon: diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md index d88acfae80..ffb73b77b8 100644 --- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md @@ -21,10 +21,10 @@ There are two approaches to running the converter in the command line. //tensorflow/contrib/lite/python:tflite_convert -- --output_file=...` -### Converting models prior to TensorFlow 1.9 +### Converting models prior to TensorFlow 1.9 The recommended approach for using the converter prior to TensorFlow 1.9 is the -[Python API](python_api.md#pre-tensorflow-1.9). If a command line tool is +[Python API](python_api.md#pre_tensorflow_1.9). If a command line tool is desired, the `toco` command line tool was available in TensorFlow 1.7. Enter `toco --help` in Terminal for additional details on the command-line flags available. There were no command line tools in TensorFlow 1.8. @@ -91,7 +91,7 @@ tflite_convert \ ## Quantization -### Convert a TensorFlow GraphDef for quantized inference +### Convert a TensorFlow GraphDef for quantized inference The TensorFlow Lite Converter is compatible with fixed point quantization models described [here](https://www.tensorflow.org/performance/quantization). These are @@ -115,7 +115,7 @@ tflite_convert \ --std_dev_values=127 ``` -### Use \"dummy-quantization\" to try out quantized inference on a float graph +### Use \"dummy-quantization\" to try out quantized inference on a float graph In order to evaluate the possible benefit of generating a quantized graph, the converter allows "dummy-quantization" on float graphs. The flags @@ -185,7 +185,7 @@ tflite_convert \ Any array in the input file can be specified as an input or output array in order to extract subgraphs out of an input graph file. The TensorFlow Lite Converter discards the parts of the graph outside of the specific subgraph. Use -[graph visualizations](#graph-visualizations) to identify the input and output +[graph visualizations](#graph_visualizations) to identify the input and output arrays that make up the desired subgraph. The follow command shows how to extract a single fused layer out of a TensorFlow @@ -228,7 +228,7 @@ visualization using either the `--output_format` flag or the `--dump_graphviz_dir` flag. The subsections below outline the use cases for each. -### Using `--output_format=GRAPHVIZ_DOT` +### Using `--output_format=GRAPHVIZ_DOT` The first way to get a Graphviz rendering is to pass `GRAPHVIZ_DOT` into `--output_format`. This results in a plausible visualization of the graph. This @@ -326,7 +326,7 @@ each individual graph transformation, resulting in thousands of files. Typically, one would then bisect into these files to understand when a given change was introduced in the graph. -### Legend for the graph visualizations +### Legend for the graph visualizations * Operators are red square boxes with the following hues of red: * Most operators are diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md index d65912fea6..eab26f5cb2 100644 --- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md @@ -59,7 +59,7 @@ based on index. * `--input_shapes`. Type: colon-separated list of comma-separated lists of integers. Each comma-separated list of integers gives the shape of one of the input arrays specified in - [TensorFlow convention](https://www.tensorflow.org/guide/dims_types#shape). + [TensorFlow convention](https://www.tensorflow.org/guide/tensors#shape). * Example: `--input_shapes=1,60,80,3` for a typical vision model means a batch size of 1, an input image height of 60, an input image width of 80, and an input image depth of 3 (representing RGB channels). diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md index e1c0e0c240..71a38c7bea 100644 --- a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md +++ b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md @@ -1,7 +1,7 @@ # TensorFlow Lite Converter & Interpreter Python API reference This page provides examples on how to use the TensorFlow Lite Converter and the -TensorFlow Lite interpreter using the Python API. +TensorFlow Lite interpreter using the Python API. [TOC] @@ -21,8 +21,8 @@ is `tf.contrib.lite.Interpreter`. Note: Reference "Additional Instructions" sections for converting TensorFlow models to TensorFlow Lite -[in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) and -[prior to TensorFlow 1.9](#pre-tensorflow-1.9) +[in TensorFlow 1.9 to TensorFlow 1.11](#pre_tensorflow_1.11) and +[prior to TensorFlow 1.9](#pre_tensorflow_1.9) `TFLiteConverter` provides class methods based on the original format of the model. `TFLiteConverter.from_session()` is available for GraphDefs. @@ -37,7 +37,7 @@ Example usages for simple float-point models are shown in The following section shows examples of how to convert a basic float-point model from each of the supported data formats into a TensorFlow Lite FlatBuffers. -### Exporting a GraphDef from tf.Session +### Exporting a GraphDef from tf.Session The following example shows how to convert a TensorFlow GraphDef into a TensorFlow Lite FlatBuffer from a `tf.Session` object. @@ -57,7 +57,7 @@ with tf.Session() as sess: open("converted_model.tflite", "wb").write(tflite_model) ``` -### Exporting a GraphDef from file +### Exporting a GraphDef from file The following example shows how to convert a TensorFlow GraphDef into a TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and @@ -81,7 +81,7 @@ tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` -### Exporting a SavedModel +### Exporting a SavedModel The following example shows how to convert a SavedModel into a TensorFlow Lite FlatBuffer. @@ -99,7 +99,7 @@ For more complex SavedModels, the optional parameters that can be passed into `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are available by running `help(tf.contrib.lite.TFLiteConverter)`. -### Exporting a tf.keras File +### Exporting a tf.keras File The following example shows how to convert a `tf.keras` model into a TensorFlow Lite FlatBuffer. This example requires @@ -156,7 +156,7 @@ terminal for detailed documentation on the attributes. Although the examples are demonstrated on GraphDefs containing only constants. The same logic can be applied irrespective of the input data format. -### Exporting a quantized GraphDef +### Exporting a quantized GraphDef The following example shows how to convert a quantized model into a TensorFlow Lite FlatBuffer. @@ -180,7 +180,7 @@ with tf.Session() as sess: ## TensorFlow Lite Python interpreter -### Using the interpreter from a model file +### Using the interpreter from a model file The following example shows how to use the TensorFlow Lite Python interpreter when provided a TensorFlow Lite FlatBuffer file. The example also demonstrates @@ -210,7 +210,7 @@ output_data = interpreter.get_tensor(output_details[0]['index']) print(output_data) ``` -### Using the interpreter from model data +### Using the interpreter from model data The following example shows how to use the TensorFlow Lite Python interpreter when starting with the TensorFlow Lite Flatbuffer model previously loaded. This @@ -237,7 +237,7 @@ interpreter.allocate_tensors() ## Additional instructions -### Build from source code +### Build from source code In order to run the latest version of the TensorFlow Lite Converter Python API, either install the nightly build with @@ -245,13 +245,13 @@ either install the nightly build with [Docker](https://www.tensorflow.org/install/docker), or [build the pip package from source](https://www.tensorflow.org/install/source). -### Converting models in TensorFlow 1.9 to TensorFlow 1.11 +### Converting models in TensorFlow 1.9 to TensorFlow 1.11 To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically identically to `TFLiteConverter`. -### Converting models prior to TensorFlow 1.9 +### Converting models prior to TensorFlow 1.9 To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow 1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)` -- GitLab From 95bffd3a923d7c433560d89aa63fdc36221e3a32 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 13:23:26 -0700 Subject: [PATCH 224/411] Don't use wildcard reshaping when dimensions can be unknown. PiperOrigin-RevId: 216582137 --- tensorflow/python/feature_column/feature_column.py | 5 ++++- tensorflow/python/feature_column/feature_column_v2.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 8a11ca142c..b1f47ebec2 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -3468,7 +3468,10 @@ class _SequenceCategoricalColumn( # combined during embedding lookup. If the tensor is already 3D, leave # as-is. shape = array_ops.shape(id_tensor) - target_shape = [shape[0], shape[1], -1] + # Compute the third dimension explicitly instead of setting it to -1, as + # that doesn't work for dynamically shaped tensors with 0-length at runtime. + # This happens for empty sequences. + target_shape = [shape[0], shape[1], math_ops.reduce_prod(shape[2:])] id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape) if weight_tensor is not None: weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape) diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index 6d089de991..67de174a67 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -4110,7 +4110,10 @@ class SequenceCategoricalColumn( # combined during embedding lookup. If the tensor is already 3D, leave # as-is. shape = array_ops.shape(id_tensor) - target_shape = [shape[0], shape[1], -1] + # Compute the third dimension explicitly instead of setting it to -1, as + # that doesn't work for dynamically shaped tensors with 0-length at runtime. + # This happens for empty sequences. + target_shape = [shape[0], shape[1], math_ops.reduce_prod(shape[2:])] id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape) if weight_tensor is not None: weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape) -- GitLab From 59b2b0392976f48f7eb8b50afa63f9f4da82af58 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 10 Oct 2018 13:23:36 -0700 Subject: [PATCH 225/411] Update renames_v2.py file. PiperOrigin-RevId: 216582173 --- tensorflow/tools/compatibility/renames_v2.py | 149 +++++++++++++----- .../tools/compatibility/tf_upgrade_v2_test.py | 17 +- .../update/generate_v2_renames_map.py | 1 + 3 files changed, 120 insertions(+), 47 deletions(-) diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py index 7e66ad816a..c3f20d0337 100644 --- a/tensorflow/tools/compatibility/renames_v2.py +++ b/tensorflow/tools/compatibility/renames_v2.py @@ -25,35 +25,53 @@ from __future__ import division from __future__ import print_function renames = { - 'tf.acos': 'tf.math.acos', - 'tf.acosh': 'tf.math.acosh', - 'tf.add': 'tf.math.add', - 'tf.as_string': 'tf.dtypes.as_string', - 'tf.asin': 'tf.math.asin', - 'tf.asinh': 'tf.math.asinh', - 'tf.atan': 'tf.math.atan', - 'tf.atan2': 'tf.math.atan2', - 'tf.atanh': 'tf.math.atanh', - 'tf.batch_to_space_nd': 'tf.manip.batch_to_space_nd', + 'tf.OpError': 'tf.errors.OpError', + 'tf.PaddingFIFOQueue': 'tf.io.PaddingFIFOQueue', + 'tf.PriorityQueue': 'tf.io.PriorityQueue', + 'tf.QueueBase': 'tf.io.QueueBase', + 'tf.RandomShuffleQueue': 'tf.io.RandomShuffleQueue', + 'tf.SparseConditionalAccumulator': 'tf.sparse.SparseConditionalAccumulator', + 'tf.accumulate_n': 'tf.math.accumulate_n', + 'tf.angle': 'tf.math.angle', + 'tf.assert_greater_equal': 'tf.debugging.assert_greater_equal', + 'tf.assert_integer': 'tf.debugging.assert_integer', + 'tf.assert_less_equal': 'tf.debugging.assert_less_equal', + 'tf.assert_near': 'tf.debugging.assert_near', + 'tf.assert_negative': 'tf.debugging.assert_negative', + 'tf.assert_non_negative': 'tf.debugging.assert_non_negative', + 'tf.assert_non_positive': 'tf.debugging.assert_non_positive', + 'tf.assert_none_equal': 'tf.debugging.assert_none_equal', + 'tf.assert_positive': 'tf.debugging.assert_positive', + 'tf.assert_proper_iterable': 'tf.debugging.assert_proper_iterable', + 'tf.assert_rank_at_least': 'tf.debugging.assert_rank_at_least', + 'tf.assert_rank_in': 'tf.debugging.assert_rank_in', + 'tf.assert_same_float_dtype': 'tf.debugging.assert_same_float_dtype', + 'tf.assert_scalar': 'tf.debugging.assert_scalar', + 'tf.assert_type': 'tf.debugging.assert_type', 'tf.betainc': 'tf.math.betainc', + 'tf.bincount': 'tf.math.bincount', 'tf.ceil': 'tf.math.ceil', 'tf.check_numerics': 'tf.debugging.check_numerics', 'tf.cholesky': 'tf.linalg.cholesky', - 'tf.cos': 'tf.math.cos', - 'tf.cosh': 'tf.math.cosh', + 'tf.cholesky_solve': 'tf.linalg.cholesky_solve', + 'tf.confusion_matrix': 'tf.train.confusion_matrix', + 'tf.conj': 'tf.math.conj', 'tf.cross': 'tf.linalg.cross', + 'tf.cumprod': 'tf.math.cumprod', 'tf.decode_base64': 'tf.io.decode_base64', 'tf.decode_compressed': 'tf.io.decode_compressed', + 'tf.decode_csv': 'tf.io.decode_csv', 'tf.decode_json_example': 'tf.io.decode_json_example', 'tf.decode_raw': 'tf.io.decode_raw', + 'tf.depth_to_space': 'tf.nn.depth_to_space', 'tf.dequantize': 'tf.quantization.dequantize', + 'tf.deserialize_many_sparse': 'tf.io.deserialize_many_sparse', 'tf.diag': 'tf.linalg.tensor_diag', 'tf.diag_part': 'tf.linalg.tensor_diag_part', 'tf.digamma': 'tf.math.digamma', 'tf.encode_base64': 'tf.io.encode_base64', - 'tf.equal': 'tf.math.equal', + 'tf.erf': 'tf.math.erf', 'tf.erfc': 'tf.math.erfc', - 'tf.exp': 'tf.math.exp', 'tf.expm1': 'tf.math.expm1', 'tf.extract_image_patches': 'tf.image.extract_image_patches', 'tf.fake_quant_with_min_max_args': 'tf.quantization.fake_quant_with_min_max_args', @@ -63,26 +81,33 @@ renames = { 'tf.fake_quant_with_min_max_vars_per_channel': 'tf.quantization.fake_quant_with_min_max_vars_per_channel', 'tf.fake_quant_with_min_max_vars_per_channel_gradient': 'tf.quantization.fake_quant_with_min_max_vars_per_channel_gradient', 'tf.fft': 'tf.spectral.fft', - 'tf.floor': 'tf.math.floor', - 'tf.gather_nd': 'tf.manip.gather_nd', - 'tf.GraphKeys.VARIABLES': 'tf.GraphKeys.GLOBAL_VARIABLES', - 'tf.greater': 'tf.math.greater', - 'tf.greater_equal': 'tf.math.greater_equal', + 'tf.floordiv': 'tf.math.floordiv', + 'tf.get_seed': 'tf.random.get_seed', + 'tf.global_norm': 'tf.linalg.global_norm', + 'tf.glorot_normal_initializer': 'tf.keras.initializers.glorot_normal', 'tf.ifft': 'tf.spectral.ifft', 'tf.igamma': 'tf.math.igamma', 'tf.igammac': 'tf.math.igammac', + 'tf.imag': 'tf.math.imag', 'tf.invert_permutation': 'tf.math.invert_permutation', 'tf.is_finite': 'tf.debugging.is_finite', 'tf.is_inf': 'tf.debugging.is_inf', 'tf.is_nan': 'tf.debugging.is_nan', - 'tf.less': 'tf.math.less', - 'tf.less_equal': 'tf.math.less_equal', + 'tf.is_non_decreasing': 'tf.debugging.is_non_decreasing', + 'tf.is_numeric_tensor': 'tf.debugging.is_numeric_tensor', + 'tf.is_strictly_increasing': 'tf.debugging.is_strictly_increasing', + 'tf.lbeta': 'tf.math.lbeta', 'tf.lgamma': 'tf.math.lgamma', - 'tf.log': 'tf.math.log', - 'tf.log1p': 'tf.math.log1p', - 'tf.logical_and': 'tf.math.logical_and', - 'tf.logical_not': 'tf.math.logical_not', - 'tf.logical_or': 'tf.math.logical_or', + 'tf.log_sigmoid': 'tf.math.log_sigmoid', + 'tf.logical_xor': 'tf.math.logical_xor', + 'tf.manip.batch_to_space_nd': 'tf.batch_to_space_nd', + 'tf.manip.gather_nd': 'tf.gather_nd', + 'tf.manip.reshape': 'tf.reshape', + 'tf.manip.reverse': 'tf.reverse', + 'tf.manip.roll': 'tf.roll', + 'tf.manip.scatter_nd': 'tf.scatter_nd', + 'tf.manip.space_to_batch_nd': 'tf.space_to_batch_nd', + 'tf.manip.tile': 'tf.tile', 'tf.matching_files': 'tf.io.matching_files', 'tf.matrix_band_part': 'tf.linalg.band_part', 'tf.matrix_determinant': 'tf.linalg.det', @@ -91,31 +116,72 @@ renames = { 'tf.matrix_inverse': 'tf.linalg.inv', 'tf.matrix_set_diag': 'tf.linalg.set_diag', 'tf.matrix_solve': 'tf.linalg.solve', + 'tf.matrix_solve_ls': 'tf.linalg.lstsq', + 'tf.matrix_transpose': 'tf.linalg.transpose', 'tf.matrix_triangular_solve': 'tf.linalg.triangular_solve', - 'tf.maximum': 'tf.math.maximum', - 'tf.minimum': 'tf.math.minimum', - 'tf.not_equal': 'tf.math.not_equal', + 'tf.nn.log_uniform_candidate_sampler': 'tf.random.log_uniform_candidate_sampler', + 'tf.nn.uniform_candidate_sampler': 'tf.random.uniform_candidate_sampler', + 'tf.orthogonal_initializer': 'tf.keras.initializers.Orthogonal', 'tf.parse_tensor': 'tf.io.parse_tensor', 'tf.polygamma': 'tf.math.polygamma', + 'tf.python_io.TFRecordCompressionType': 'tf.io.TFRecordCompressionType', + 'tf.python_io.TFRecordOptions': 'tf.io.TFRecordOptions', + 'tf.python_io.TFRecordWriter': 'tf.io.TFRecordWriter', + 'tf.python_io.tf_record_iterator': 'tf.io.tf_record_iterator', 'tf.qr': 'tf.linalg.qr', + 'tf.quantize': 'tf.quantization.quantize', 'tf.quantized_concat': 'tf.quantization.quantized_concat', + 'tf.random_gamma': 'tf.random.gamma', + 'tf.random_poisson': 'tf.random.poisson', 'tf.read_file': 'tf.io.read_file', + 'tf.real': 'tf.math.real', 'tf.reciprocal': 'tf.math.reciprocal', + 'tf.reduce_join': 'tf.strings.reduce_join', 'tf.regex_replace': 'tf.strings.regex_replace', - 'tf.reshape': 'tf.manip.reshape', - 'tf.reverse': 'tf.manip.reverse', - 'tf.reverse_v2': 'tf.manip.reverse', + 'tf.reverse_v2': 'tf.reverse', 'tf.rint': 'tf.math.rint', 'tf.rsqrt': 'tf.math.rsqrt', - 'tf.scatter_nd': 'tf.manip.scatter_nd', + 'tf.saved_model.builder.SavedModelBuilder': 'tf.saved_model.Builder', + 'tf.saved_model.loader.maybe_saved_model_directory': 'tf.saved_model.maybe_saved_model_directory', + 'tf.saved_model.main_op.main_op_with_restore': 'tf.saved_model.main_op_with_restore', + 'tf.saved_model.signature_def_utils.build_signature_def': 'tf.saved_model.build_signature_def', + 'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.saved_model.classification_signature_def', + 'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.saved_model.is_valid_signature', + 'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.saved_model.predict_signature_def', + 'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.saved_model.regression_signature_def', + 'tf.saved_model.utils.build_tensor_info': 'tf.saved_model.build_tensor_info', + 'tf.saved_model.utils.get_tensor_from_tensor_info': 'tf.saved_model.get_tensor_from_tensor_info', 'tf.segment_max': 'tf.math.segment_max', 'tf.segment_mean': 'tf.math.segment_mean', 'tf.segment_min': 'tf.math.segment_min', 'tf.segment_prod': 'tf.math.segment_prod', 'tf.segment_sum': 'tf.math.segment_sum', - 'tf.sin': 'tf.math.sin', - 'tf.sinh': 'tf.math.sinh', - 'tf.space_to_batch_nd': 'tf.manip.space_to_batch_nd', + 'tf.self_adjoint_eig': 'tf.linalg.eigh', + 'tf.self_adjoint_eigvals': 'tf.linalg.eigvalsh', + 'tf.serialize_many_sparse': 'tf.io.serialize_many_sparse', + 'tf.serialize_sparse': 'tf.io.serialize_sparse', + 'tf.space_to_batch': 'tf.nn.space_to_batch', + 'tf.space_to_depth': 'tf.nn.space_to_depth', + 'tf.sparse_add': 'tf.sparse.add', + 'tf.sparse_fill_empty_rows': 'tf.sparse.fill_empty_rows', + 'tf.sparse_mask': 'tf.sparse.mask', + 'tf.sparse_maximum': 'tf.sparse.maximum', + 'tf.sparse_merge': 'tf.sparse.merge', + 'tf.sparse_minimum': 'tf.sparse.minimum', + 'tf.sparse_placeholder': 'tf.sparse.placeholder', + 'tf.sparse_reorder': 'tf.sparse.reorder', + 'tf.sparse_reset_shape': 'tf.sparse.reset_shape', + 'tf.sparse_reshape': 'tf.sparse.reshape', + 'tf.sparse_retain': 'tf.sparse.retain', + 'tf.sparse_segment_mean': 'tf.sparse.segment_mean', + 'tf.sparse_segment_sqrt_n': 'tf.sparse.segment_sqrt_n', + 'tf.sparse_segment_sum': 'tf.sparse.segment_sum', + 'tf.sparse_slice': 'tf.sparse.slice', + 'tf.sparse_softmax': 'tf.sparse.softmax', + 'tf.sparse_tensor_dense_matmul': 'tf.sparse.matmul', + 'tf.sparse_tensor_to_dense': 'tf.sparse.to_dense', + 'tf.sparse_to_indicator': 'tf.sparse.to_indicator', + 'tf.sparse_transpose': 'tf.sparse.transpose', 'tf.squared_difference': 'tf.math.squared_difference', 'tf.string_join': 'tf.strings.join', 'tf.string_strip': 'tf.strings.strip', @@ -123,13 +189,18 @@ renames = { 'tf.string_to_hash_bucket_fast': 'tf.strings.to_hash_bucket_fast', 'tf.string_to_hash_bucket_strong': 'tf.strings.to_hash_bucket_strong', 'tf.string_to_number': 'tf.strings.to_number', - 'tf.substr': 'tf.strings.substr', - 'tf.tan': 'tf.math.tan', - 'tf.tile': 'tf.manip.tile', + 'tf.svd': 'tf.linalg.svd', + 'tf.trace': 'tf.linalg.trace', + 'tf.train.match_filenames_once': 'tf.io.match_filenames_once', + 'tf.uniform_unit_scaling_initializer': 'tf.initializers.uniform_unit_scaling', 'tf.unsorted_segment_max': 'tf.math.unsorted_segment_max', + 'tf.unsorted_segment_mean': 'tf.math.unsorted_segment_mean', 'tf.unsorted_segment_min': 'tf.math.unsorted_segment_min', 'tf.unsorted_segment_prod': 'tf.math.unsorted_segment_prod', + 'tf.unsorted_segment_sqrt_n': 'tf.math.unsorted_segment_sqrt_n', 'tf.unsorted_segment_sum': 'tf.math.unsorted_segment_sum', + 'tf.variance_scaling_initializer': 'tf.keras.initializers.VarianceScaling', + 'tf.verify_tensor_all_finite': 'tf.debugging.assert_all_finite', 'tf.write_file': 'tf.io.write_file', 'tf.zeta': 'tf.math.zeta' } diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index 3886c1e8b9..f606d202a6 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -49,19 +49,20 @@ class TestUpgrade(test_util.TensorFlowTestCase): self.assertTrue(report.find("Failed to parse") != -1) def testReport(self): - text = "tf.acos(a)\n" + text = "tf.assert_near(a)\n" _, report, unused_errors, unused_new_text = self._upgrade(text) # This is not a complete test, but it is a sanity test that a report # is generating information. - self.assertTrue(report.find("Renamed function `tf.acos` to `tf.math.acos`")) + self.assertTrue(report.find("Renamed function `tf.assert_near` to " + "`tf.debugging.assert_near`")) def testRename(self): - text = "tf.acos(a)\n" + text = "tf.conj(a)\n" _, unused_report, unused_errors, new_text = self._upgrade(text) - self.assertEqual(new_text, "tf.math.acos(a)\n") - text = "tf.rsqrt(tf.log(3.8))\n" + self.assertEqual(new_text, "tf.math.conj(a)\n") + text = "tf.rsqrt(tf.log_sigmoid(3.8))\n" _, unused_report, unused_errors, new_text = self._upgrade(text) - self.assertEqual(new_text, "tf.math.rsqrt(tf.math.log(3.8))\n") + self.assertEqual(new_text, "tf.math.rsqrt(tf.math.log_sigmoid(3.8))\n") def testLearningRateDecay(self): for decay in ["tf.train.exponential_decay", "tf.train.piecewise_constant", @@ -82,8 +83,8 @@ class TestUpgradeFiles(test_util.TensorFlowTestCase): def testInplace(self): """Check to make sure we don't have a file system race.""" temp_file = tempfile.NamedTemporaryFile("w", delete=False) - original = "tf.acos(a, b)\n" - upgraded = "tf.math.acos(a, b)\n" + original = "tf.conj(a)\n" + upgraded = "tf.math.conj(a)\n" temp_file.write(original) temp_file.close() upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade_v2.TFAPIChangeSpec()) diff --git a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py index 567eceb0b6..7d6beca358 100644 --- a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py +++ b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py @@ -88,6 +88,7 @@ def update_renames_v2(output_file_path): visitor = public_api.PublicAPIVisitor(visit) visitor.do_not_descend_map['tf'].append('contrib') + visitor.do_not_descend_map['tf.compat'] = ['v1', 'v2'] traverse.traverse(tf, visitor) renames_file_text = '%srenames = {\n%s\n}\n' % ( -- GitLab From 2ed698735d465872e16cb6b292bd747ca431c083 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 10 Oct 2018 13:31:12 -0700 Subject: [PATCH 226/411] Support lowering of nested If and While ops. Removes the LowerIfOpPass and LowerWhileOpPass and adds a unified LowerIfWhilePass. This change is required because the PRE_PLACEMENT graph optimization pass(https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/common_runtime/graph_execution_state.cc#L531) is run only once and hence can not handle nesting. Enable the test for nested cond and while in control_flow_ops_py_test. PiperOrigin-RevId: 216583372 --- tensorflow/core/BUILD | 25 ++ tensorflow/core/common_runtime/lower_if_op.cc | 47 +-- tensorflow/core/common_runtime/lower_if_op.h | 14 +- .../core/common_runtime/lower_if_op_test.cc | 9 +- .../core/common_runtime/lower_if_while.cc | 95 +++++ .../core/common_runtime/lower_if_while.h | 38 ++ .../common_runtime/lower_if_while_test.cc | 337 ++++++++++++++++++ .../core/common_runtime/lower_while_op.cc | 61 +--- .../core/common_runtime/lower_while_op.h | 14 +- .../common_runtime/lower_while_op_test.cc | 19 +- .../kernel_tests/control_flow_ops_py_test.py | 11 +- 11 files changed, 533 insertions(+), 137 deletions(-) create mode 100644 tensorflow/core/common_runtime/lower_if_while.cc create mode 100644 tensorflow/core/common_runtime/lower_if_while.h create mode 100644 tensorflow/core/common_runtime/lower_if_while_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9e7806342a..25651252a7 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2823,6 +2823,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/graph_optimizer.h", "common_runtime/local_device.h", "common_runtime/lower_if_op.h", + "common_runtime/lower_if_while.h", "common_runtime/lower_while_op.h", "common_runtime/memory_types.h", "common_runtime/mkl_cpu_allocator.h", @@ -2878,6 +2879,7 @@ tf_cuda_library( "common_runtime/hierarchical_tree_broadcaster.cc", "common_runtime/local_device.cc", "common_runtime/lower_if_op.cc", + "common_runtime/lower_if_while.cc", "common_runtime/lower_while_op.cc", "common_runtime/memory_types.cc", "common_runtime/mkl_cpu_allocator.cc", @@ -4723,6 +4725,29 @@ tf_cc_tests( ], ) +tf_cc_tests( + name = "common_runtime_lower_if_while_test", + size = "small", + srcs = ["common_runtime/lower_if_while_test.cc"], + deps = [ + ":all_kernels", + ":core_cpu", + ":core_cpu_internal", + ":direct_session", + ":framework", + ":framework_internal", + ":lib", + ":test", + ":test_main", + ":testlib", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:client_session", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:ops", + ], +) + # Test data filegroup( name = "image_testdata", diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc index 9306386117..44a2478e3f 100644 --- a/tensorflow/core/common_runtime/lower_if_op.cc +++ b/tensorflow/core/common_runtime/lower_if_op.cc @@ -22,10 +22,6 @@ limitations under the License. namespace tensorflow { -// TODO(jpienaar): Consider making it a public attribute. -const char* const LowerIfOpPass::kLowerUsingSwitchMergeAttr = - "_lower_using_switch_merge"; - namespace { using NodeOut = NodeBuilder::NodeOut; @@ -237,45 +233,7 @@ Status CondBuilder::InlineCallNodes() { } // namespace -Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) { - if (options.partition_graphs != nullptr) { - return errors::Internal( - "Lowering If op should happen before partitioning."); - } - if (options.graph == nullptr) { - return Status::OK(); - } - - Graph* g = options.graph->get(); - if (g == nullptr) { - return errors::Internal("Lowering If op requires a graph to be available."); - } - - FunctionLibraryDefinition* flib = options.flib_def; - if (flib == nullptr) { - return errors::Internal( - "Lowering If op requires a FunctionLibraryDefinition to be available."); - } - - // Match all the nodes that need to be rewritten. - gtl::InlinedVector matches; - for (Node* n : g->op_nodes()) { - if (n->type_string() == "If") { - // Only rewrite if the If op is marked as needing to be lowered. - bool match; - Status s = GetNodeAttr(n->attrs(), kLowerUsingSwitchMergeAttr, &match); - if (s.ok() && match) matches.push_back(n); - } - } - for (Node* n : matches) { - TF_RETURN_IF_ERROR(RewriteNode(n, *flib, g)); - } - return Status::OK(); -} - -Status LowerIfOpPass::RewriteNode(Node* n, - const FunctionLibraryDefinition& flib, - Graph* g) { +Status RewriteIfNode(Node* n, Graph* g, const FunctionLibraryDefinition& flib) { const AttrValue* then_attr = n->attrs().Find("then_branch"); if (then_attr == nullptr) { return errors::InvalidArgument("Then branch function missing"); @@ -296,7 +254,4 @@ Status LowerIfOpPass::RewriteNode(Node* n, return Status::OK(); } -REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, - LowerIfOpPass); - } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_if_op.h b/tensorflow/core/common_runtime/lower_if_op.h index 5ab1123e3f..fc52e597be 100644 --- a/tensorflow/core/common_runtime/lower_if_op.h +++ b/tensorflow/core/common_runtime/lower_if_op.h @@ -21,18 +21,8 @@ limitations under the License. namespace tensorflow { -// Rewrite If ops to use switch and merge nodes instead. -class LowerIfOpPass : public GraphOptimizationPass { - public: - static const char* const kLowerUsingSwitchMergeAttr; - - Status Run(const GraphOptimizationPassOptions& options) override; - - private: - // Rewrite the given If node `n` in graph `g` to use the switch-merge - // form. `flib` should contain the branch functions referenced by `n`. - Status RewriteNode(Node* n, const FunctionLibraryDefinition& flib, Graph* g); -}; +// Replaces If node `n` with its lowered form that uses Switch and Merge nodes. +Status RewriteIfNode(Node* n, Graph* g, const FunctionLibraryDefinition& flib); } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_if_op_test.cc b/tensorflow/core/common_runtime/lower_if_op_test.cc index 044a355d06..5765e3e367 100644 --- a/tensorflow/core/common_runtime/lower_if_op_test.cc +++ b/tensorflow/core/common_runtime/lower_if_op_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_if_while.h" #include "tensorflow/cc/client/client_session.h" #include "tensorflow/cc/framework/ops.h" @@ -40,7 +40,7 @@ Status Rewrite(std::unique_ptr* graph) { GraphOptimizationPassOptions opt_options; opt_options.graph = graph; opt_options.flib_def = &flib_def; - LowerIfOpPass pass; + LowerIfWhilePass pass; return pass.Run(opt_options); } @@ -51,7 +51,6 @@ TEST(LowerIfOpTest, Simple) { FunctionDefLibrary f_lib_proto; *(f_lib_proto.add_function()) = test::function::XTimesTwo(); *(f_lib_proto.add_function()) = test::function::XTimesFour(); - FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto); // Construct simple conditional that switches on `pred` and operates only on // single input `A`. @@ -65,12 +64,12 @@ TEST(LowerIfOpTest, Simple) { tb.mutable_func()->set_name("XTimesTwo"); AttrValue eb; eb.mutable_func()->set_name("XTimesFour"); - TF_ASSERT_OK(NodeBuilder("if", "If", &f_lib) + TF_ASSERT_OK(NodeBuilder("if", "If", &root.graph()->flib_def()) .Input(pred.node()) .Input(inputs) .Attr("then_branch", tb) .Attr("else_branch", eb) - .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) .Attr("Tout", {DT_INT32}) .Finalize(root.graph(), &written_if)); TF_ASSERT_OK(root.DoShapeInference(written_if)); diff --git a/tensorflow/core/common_runtime/lower_if_while.cc b/tensorflow/core/common_runtime/lower_if_while.cc new file mode 100644 index 0000000000..c1db575b04 --- /dev/null +++ b/tensorflow/core/common_runtime/lower_if_while.cc @@ -0,0 +1,95 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/lower_if_while.h" +#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_while_op.h" + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" + +namespace tensorflow { + +#if defined(_MSC_VER) +constexpr char* LowerIfWhilePass::kLowerUsingSwitchMergeAttr; +#else +constexpr char LowerIfWhilePass::kLowerUsingSwitchMergeAttr[]; +#endif + +namespace { + +bool HasLoweringAttr(const AttrSlice& attrs) { + bool match; + Status s = + GetNodeAttr(attrs, LowerIfWhilePass::kLowerUsingSwitchMergeAttr, &match); + return s.ok() && match; +} + +} // namespace + +Status LowerIfWhilePass::Run(const GraphOptimizationPassOptions& options) { + if (options.partition_graphs != nullptr) { + return errors::Internal( + "Lowering If/While ops should happen before partitioning."); + } + if (options.graph == nullptr) { + return Status::OK(); + } + + Graph* g = options.graph->get(); + if (g == nullptr) { + return errors::Internal( + "Lowering While op requires a graph to be available."); + } + + FunctionLibraryDefinition* flib = options.flib_def; + if (flib == nullptr) { + return errors::Internal( + "Lowering If op requires a FunctionLibraryDefinition to be available."); + } + + // Lower all If and While ops that have the `kLowerUsingSwitchMergeAttr` attr + // set. + // We start at `i` = 2 to skip the source and sink nodes. + // Note that `g->num_node_ids()` may change in the for body if a matching If + // or While node is lowered. Since new graph nodes are always added to the + // end of the list of nodes it is ensured that nested If/While nodes will be + // lowered as well. + for (int i = 2; i < g->num_node_ids(); ++i) { + Node* n = g->FindNodeId(i); + if (n == nullptr) continue; // deleted node + if (HasLoweringAttr(n->attrs())) { + if (n->type_string() == "If") { + TF_RETURN_IF_ERROR(RewriteIfNode(n, g, *flib)); + } else if (n->type_string() == "While") { + TF_RETURN_IF_ERROR(RewriteWhileNode(n, g, *flib)); + } else { + return errors::Internal( + "Node:", n->name(), " of type ", n->type_string(), " has '", + LowerIfWhilePass::kLowerUsingSwitchMergeAttr, + "' attr set but it does not support lowering.\n", n->DebugString()); + } + } + } + + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, + LowerIfWhilePass); + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_if_while.h b/tensorflow/core/common_runtime/lower_if_while.h new file mode 100644 index 0000000000..efa3945bca --- /dev/null +++ b/tensorflow/core/common_runtime/lower_if_while.h @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Rewrite If and While ops to use lower level control flow primitives instead. +class LowerIfWhilePass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +#if defined(_MSC_VER) + static constexpr char* kLowerUsingSwitchMergeAttr = +#else + static constexpr char kLowerUsingSwitchMergeAttr[] = +#endif + "_lower_using_switch_merge"; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_ diff --git a/tensorflow/core/common_runtime/lower_if_while_test.cc b/tensorflow/core/common_runtime/lower_if_while_test.cc new file mode 100644 index 0000000000..07bcecf168 --- /dev/null +++ b/tensorflow/core/common_runtime/lower_if_while_test.cc @@ -0,0 +1,337 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/lower_if_while.h" + +#include "tensorflow/cc/client/client_session.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/control_flow_ops_internal.h" +#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/graph_runner.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/graph_def_builder_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +typedef FunctionDefHelper FDH; + +static void AssertHasSubstr(StringPiece s, StringPiece expected) { + ASSERT_TRUE(str_util::StrContains(s, expected)) + << "'" << s << "' does not contain '" << expected << "'"; +} + +Status Rewrite(std::unique_ptr* graph) { + FunctionLibraryDefinition flib_def((*graph)->flib_def()); + GraphOptimizationPassOptions opt_options; + opt_options.graph = graph; + opt_options.flib_def = &flib_def; + LowerIfWhilePass pass; + return pass.Run(opt_options); +} + +// (counter:int32, pred:bool, x:int32) -> counter < N +FunctionDef WhileWithIfCond(int32 N) { + const Tensor kN = test::AsScalar(N); + return FDH::Define( + // Name + "WhileWithIfCond", + // Args + {"counter: int32", "pred: bool", "x: int32"}, + // Return values + {"z: bool"}, + // Attr def + {}, + // Nodes + { + {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT32}}}, + {{"z"}, "Less", {"counter", "N"}, {{"T", DT_INT32}}}, + }); +} + +// (counter:int32, pred:bool, x:int32) -> +// (counter+1, pred, if pred: x * 2 else: x * 4) +FunctionDef WhileWithIfBody() { + NameAttrList then_func; + then_func.set_name("XTimesTwo"); + NameAttrList else_func; + else_func.set_name("XTimesFour"); + const Tensor kOne = test::AsScalar(1); + std::vector input_types = {DT_INT32}; + std::vector output_types = {DT_INT32}; + return FDH::Define( + // Name + "WhileWithIfBody", + // Args + {"counter: int32", "pred: bool", "x: int32"}, + // Return values + {"updated_counter: int32", "pred: bool", "if: int32"}, + // Attr def + {}, + // Nodes + { + {{"if"}, + "If", + {"pred", "x"}, + {{"then_branch", then_func}, + {"else_branch", else_func}, + {"Tcond", DT_BOOL}, + {"Tin", input_types}, + {"Tout", output_types}, + {LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true}}}, + {{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT32}}}, + {{"updated_counter"}, "Add", {"counter", "one"}, {{"T", DT_INT32}}}, + }); +} + +TEST(LowerIfWhileTest, CondInWhile) { + // Tests the value of `a` for different values of args after the following + // program: + // + // Args: + // counter = Arg(type = int32) + // pred = Arg(type = bool) + // a = Arg(type = int32) + // N = 3 + // while (counter < N) { + // counter += 1; + // if (pred) { + // a *= 2; + // } else { + // a *= 4; + // } + // } + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + FunctionDefLibrary f_lib_proto; + // Cond functions. + *f_lib_proto.add_function() = test::function::XTimesTwo(); + *f_lib_proto.add_function() = test::function::XTimesFour(); + // While functions. + *f_lib_proto.add_function() = WhileWithIfCond(3); + *f_lib_proto.add_function() = WhileWithIfBody(); + + Scope root = Scope::NewRootScope().ExitOnError(); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); + auto counter = ops::_Arg(root.WithOpName("counter"), DT_INT32, 0); + auto pred = ops::_Arg(root.WithOpName("pred"), DT_BOOL, 0); + auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 0); + std::vector inputs( + {NodeBuilder::NodeOut(counter.node()), NodeBuilder::NodeOut(pred.node()), + NodeBuilder::NodeOut(a.node())}); + Node* while_node; + AttrValue cond_func; + cond_func.mutable_func()->set_name("WhileWithIfCond"); + AttrValue body_func; + body_func.mutable_func()->set_name("WhileWithIfBody"); + TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def()) + .Input(inputs) + .Attr("T", {DT_INT32, DT_BOOL, DT_INT32}) + .Attr("cond", cond_func) + .Attr("body", body_func) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) + .Finalize(root.graph(), &while_node)); + TF_ASSERT_OK(root.DoShapeInference(while_node)); + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(Rewrite(&graph)); + + // Lowered graph has no While and If ops. + for (const auto* op : graph->op_nodes()) { + ASSERT_NE(op->type_string(), "While"); + ASSERT_NE(op->type_string(), "If"); + } + + // Verify execution. + ClientSession session(root); + { + ClientSession::FeedType feeds; + feeds.emplace(Output(counter.node()), Input::Initializer(0)); + feeds.emplace(Output(pred.node()), Input::Initializer(true)); + feeds.emplace(Output(a.node()), Input::Initializer(1)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(feeds, {Output(while_node, 2)}, &out_tensors)); + ASSERT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 8); + } + { + ClientSession::FeedType feeds; + feeds.emplace(Output(counter.node()), Input::Initializer(0)); + feeds.emplace(Output(pred.node()), Input::Initializer(false)); + feeds.emplace(Output(a.node()), Input::Initializer(1)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(feeds, {Output(while_node, 2)}, &out_tensors)); + ASSERT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 64); // a + } +} + +// x:int32 -> +// while x <= N: +// x*=2; +// return x; +FunctionDef IfWithWhileThen() { + NameAttrList cond_func; + cond_func.set_name("LessThanOrEqualToN"); + NameAttrList body_func; + body_func.set_name("XTimesTwo"); + std::vector input_and_output_types = {DT_INT32}; + std::vector output_shapes = {TensorShape()}; + return FDH::Define( + // Name + "IfWithWhileThen", + // Args + {"x: int32"}, + // Return values + {"while: int32"}, + // Attr def + {}, + // Nodes + { + {{"while"}, + "While", + {"x"}, + {{"cond", cond_func}, + {"body", body_func}, + {"T", input_and_output_types}, + {"output_shapes", output_shapes}, + {LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true}}}, + }); +} + +TEST(LowerIfWhileTest, WhileInCond) { + // Tests the value of `a` for different values of args after the following + // program: + // + // Args: + // pred = Arg(type = bool) + // a = Arg(type = int32) + // N = 8 + // if (pred) { + // while (a <= N) { + // a *= 2; + // } + // } + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + // Add test functions for cond and body. + FunctionDefLibrary f_lib_proto; + *f_lib_proto.add_function() = test::function::XTimesTwo(); + *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8); + *f_lib_proto.add_function() = IfWithWhileThen(); + + Scope root = Scope::NewRootScope().ExitOnError(); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); + auto pred = ops::_Arg(root.WithOpName("pred"), DT_BOOL, 0); + auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 1); + std::vector inputs({NodeBuilder::NodeOut(a.node())}); + AttrValue then_func; + then_func.mutable_func()->set_name("IfWithWhileThen"); + AttrValue else_func; + else_func.mutable_func()->set_name("XTimesTwo"); + Node* if_node; + TF_ASSERT_OK(NodeBuilder("if", "If", &root.graph()->flib_def()) + .Input(pred.node()) + .Input(inputs) + .Attr("then_branch", then_func) + .Attr("else_branch", else_func) + .Attr("Tout", {DT_INT32}) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) + .Finalize(root.graph(), &if_node)); + TF_ASSERT_OK(root.DoShapeInference(if_node)); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + // The input graph has no lower level control flow primitives. + int node_called_if_count = 0; + for (const auto* op : graph->op_nodes()) { + ASSERT_FALSE(op->IsEnter()); + ASSERT_FALSE(op->IsExit()); + ASSERT_FALSE(op->IsSwitch()); + ASSERT_FALSE(op->IsMerge()); + ASSERT_FALSE(op->IsNextIteration()); + ASSERT_FALSE(op->IsLoopCond()); + if (op->name() == "if") { + node_called_if_count++; + } + } + ASSERT_EQ(node_called_if_count, 1); + + TF_ASSERT_OK(Rewrite(&graph)); + + node_called_if_count = 0; + for (const auto* op : graph->op_nodes()) { + if (op->name() == "if") { + node_called_if_count++; + } + ASSERT_NE(op->type_string(), "While"); + ASSERT_NE(op->type_string(), "If"); + } + // One node per loop input. + ASSERT_EQ(node_called_if_count, 1); + + // Verify execution. + ClientSession session(root); + { + ClientSession::FeedType feeds; + feeds.emplace(Output(pred.node()), Input::Initializer(true)); + feeds.emplace(Output(a.node()), Input::Initializer(1)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(feeds, {Output(if_node)}, &out_tensors)); + ASSERT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 16); + } + { + ClientSession::FeedType feeds; + feeds.emplace(Output(pred.node()), Input::Initializer(false)); + feeds.emplace(Output(a.node()), Input::Initializer(1)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(feeds, {Output(if_node)}, &out_tensors)); + ASSERT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 2); + } +} + +TEST(LowerIfWhileTest, RaisesWhenLoweringUnhandledOpType) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + Scope root = Scope::NewRootScope().ExitOnError(); + Node* const_node; + Tensor const_val(DT_INT32, TensorShape({})); + const_val.scalar()() = 1; + TF_ASSERT_OK(NodeBuilder("const", "Const") + .Attr("value", const_val) + .Attr("dtype", const_val.dtype()) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) + .Finalize(root.graph(), &const_node)); + TF_ASSERT_OK(root.DoShapeInference(const_node)); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + Status s = Rewrite(&graph); + ASSERT_EQ(s.code(), error::INTERNAL); + AssertHasSubstr(s.error_message(), "does not support lowering"); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc index 1f5da133e9..6f9921a796 100644 --- a/tensorflow/core/common_runtime/lower_while_op.cc +++ b/tensorflow/core/common_runtime/lower_while_op.cc @@ -53,8 +53,9 @@ using NodeOut = NodeBuilder::NodeOut; class LowerWhileHelper { public: static Status Run(Node* while_op, const string& cond_fn_name, - const string& body_fn_name, Graph* graph) { - LowerWhileHelper helper(while_op, cond_fn_name, body_fn_name, graph); + const string& body_fn_name, Graph* graph, + const FunctionLibraryDefinition& flib) { + LowerWhileHelper helper(while_op, cond_fn_name, body_fn_name, graph, flib); return helper.RunInternal(); } @@ -63,7 +64,8 @@ class LowerWhileHelper { // and body functions named `cond_fn_name` and `body_fn_name` respectively in // the given graph. LowerWhileHelper(Node* while_op, const string& cond_fn_name, - const string& body_fn_name, Graph* graph); + const string& body_fn_name, Graph* graph, + const FunctionLibraryDefinition& flib); Status RunInternal(); @@ -127,6 +129,7 @@ class LowerWhileHelper { // The IdentityN node with the same outputs as the original While op. Node* lowered_while_output_; Graph* graph_; + const FunctionLibraryDefinition& flib_; // Name of the `while_op_`. string name_; @@ -143,9 +146,11 @@ class LowerWhileHelper { }; LowerWhileHelper::LowerWhileHelper(Node* while_op, const string& cond_fn_name, - const string& body_fn_name, Graph* graph) + const string& body_fn_name, Graph* graph, + const FunctionLibraryDefinition& flib) : while_op_(while_op), graph_(graph), + flib_(flib), name_(while_op->name()), cond_call_builder_(NewName("cond"), cond_fn_name, graph->op_registry()), body_call_builder_(NewName("body"), body_fn_name, graph->op_registry()), @@ -346,8 +351,8 @@ string LowerWhileHelper::NewName(const string& infix) { return graph_->NewName(strings::StrCat(name_, "/", infix)); } -Status InlineCallInGraph(Node* n, Graph* g) { - const auto& lib = g->flib_def(); +Status InlineCallInGraph(Node* n, Graph* g, + const FunctionLibraryDefinition& lib) { const FunctionDef* fdef = lib.Find(n->type_string()); CHECK(fdef != nullptr); FunctionBody* fbody; @@ -365,46 +370,15 @@ Status InlineCallInGraph(Node* n, Graph* g) { } Status LowerWhileHelper::InlineCallNodes() { - TF_RETURN_IF_ERROR(InlineCallInGraph(cond_call_node_, graph_)); - TF_RETURN_IF_ERROR(InlineCallInGraph(body_call_node_, graph_)); + TF_RETURN_IF_ERROR(InlineCallInGraph(cond_call_node_, graph_, flib_)); + TF_RETURN_IF_ERROR(InlineCallInGraph(body_call_node_, graph_, flib_)); return Status::OK(); } } // namespace -Status LowerWhileOpPass::Run(const GraphOptimizationPassOptions& options) { - if (options.partition_graphs != nullptr) { - return errors::Internal( - "Lowering While op should happen before partitioning."); - } - if (options.graph == nullptr) { - return Status::OK(); - } - - Graph* g = options.graph->get(); - if (g == nullptr) { - return errors::Internal( - "Lowering While op requires a graph to be available."); - } - - // Match all the nodes that need to be rewritten. - gtl::InlinedVector matches; - for (Node* n : g->op_nodes()) { - if (n->type_string() == "While") { - // Only rewrite if the While op is marked as needing to be lowered. - bool match; - Status s = GetNodeAttr(n->attrs(), - LowerIfOpPass::kLowerUsingSwitchMergeAttr, &match); - if (s.ok() && match) matches.push_back(n); - } - } - for (Node* n : matches) { - TF_RETURN_IF_ERROR(RewriteNode(n, g)); - } - return Status::OK(); -} - -Status LowerWhileOpPass::RewriteNode(Node* n, Graph* g) { +Status RewriteWhileNode(Node* n, Graph* g, + const FunctionLibraryDefinition& flib) { const AttrValue* cond_attr = n->attrs().Find("cond"); if (cond_attr == nullptr) { return errors::InvalidArgument("While cond function missing"); @@ -415,13 +389,10 @@ Status LowerWhileOpPass::RewriteNode(Node* n, Graph* g) { } TF_RETURN_IF_ERROR(LowerWhileHelper::Run(n, cond_attr->func().name(), - body_attr->func().name(), g)); + body_attr->func().name(), g, flib)); g->RemoveNode(n); return Status::OK(); } -REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, - LowerWhileOpPass); - } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_while_op.h b/tensorflow/core/common_runtime/lower_while_op.h index eadafbeb91..4b640bafba 100644 --- a/tensorflow/core/common_runtime/lower_while_op.h +++ b/tensorflow/core/common_runtime/lower_while_op.h @@ -21,16 +21,10 @@ limitations under the License. namespace tensorflow { -// Rewrite While ops to use lower level control flow primitives instead. -class LowerWhileOpPass : public GraphOptimizationPass { - public: - Status Run(const GraphOptimizationPassOptions& options) override; - - private: - // Rewrite the given While node `n` in graph `g` to use the lower level - // primitives Enter, Exit, Switch, Merge and NextIteration. - Status RewriteNode(Node* n, Graph* g); -}; +// Replaces While node `n` with its lowered form that uses Enter, Exit, Switch, +// Merge, NextIteration and LoopCond nodes. +Status RewriteWhileNode(Node* n, Graph* g, + const FunctionLibraryDefinition& flib); } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/lower_while_op_test.cc b/tensorflow/core/common_runtime/lower_while_op_test.cc index 27cbada004..24fd4ed5bb 100644 --- a/tensorflow/core/common_runtime/lower_while_op_test.cc +++ b/tensorflow/core/common_runtime/lower_while_op_test.cc @@ -13,8 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/common_runtime/lower_while_op.h" -#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_if_while.h" #include "tensorflow/cc/client/client_session.h" #include "tensorflow/cc/framework/ops.h" @@ -37,13 +36,11 @@ namespace tensorflow { namespace { Status Rewrite(std::unique_ptr* graph) { - FunctionDefLibrary flib; - FunctionLibraryDefinition flib_def((*graph)->op_registry(), flib); - + FunctionLibraryDefinition flib_def((*graph)->flib_def()); GraphOptimizationPassOptions opt_options; opt_options.graph = graph; opt_options.flib_def = &flib_def; - LowerWhileOpPass pass; + LowerIfWhilePass pass; return pass.Run(opt_options); } @@ -54,7 +51,6 @@ TEST(LowerWhileOpTest, Simple) { FunctionDefLibrary f_lib_proto; *f_lib_proto.add_function() = test::function::XTimesTwo(); *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8); - FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto); Scope root = Scope::NewRootScope().ExitOnError(); TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); @@ -65,12 +61,12 @@ TEST(LowerWhileOpTest, Simple) { cond_func.mutable_func()->set_name("LessThanOrEqualToN"); AttrValue body_func; body_func.mutable_func()->set_name("XTimesTwo"); - TF_ASSERT_OK(NodeBuilder("while", "While", &f_lib) + TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def()) .Input(inputs) .Attr("T", {DT_INT32}) .Attr("cond", cond_func) .Attr("body", body_func) - .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) .Finalize(root.graph(), &while_node)); TF_ASSERT_OK(root.DoShapeInference(while_node)); TF_ASSERT_OK(root.ToGraph(graph.get())); @@ -154,7 +150,6 @@ TEST(LowerWhileOpTest, MultipleInputs) { FunctionDefLibrary f_lib_proto; *(f_lib_proto.add_function()) = test::function::XPlusOneXTimesY(); *(f_lib_proto.add_function()) = test::function::XYXLessThanOrEqualToN(4); - FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto); Scope root = Scope::NewRootScope().ExitOnError(); TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); @@ -167,12 +162,12 @@ TEST(LowerWhileOpTest, MultipleInputs) { cond_func.mutable_func()->set_name("XYXLessThanOrEqualToN"); AttrValue body_func; body_func.mutable_func()->set_name("XPlusOneXTimesY"); - TF_ASSERT_OK(NodeBuilder("while", "While", &f_lib) + TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def()) .Input(inputs) .Attr("T", {DT_INT32, DT_INT32}) .Attr("cond", cond_func) .Attr("body", body_func) - .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true) + .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true) .Finalize(root.graph(), &while_node)); TF_ASSERT_OK(root.DoShapeInference(while_node)); TF_ASSERT_OK(root.ToGraph(graph.get())); diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 46b8b10e90..f7bab691cd 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -1366,7 +1366,6 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(lambda x: x < 10, body, [x0]) self.assertEqual(10, sess.run(r, {b: True})) - @test_util.disable_control_flow_v2("b/116134862 (cond output shape)") def testWhileCondWithControl(self): # Ensure that no control edges by an outer control dependency context are # added to nodes inside cond/while contexts. @@ -1478,7 +1477,6 @@ class ControlFlowTest(test.TestCase): self._testCondWhile_3(use_gpu=False) self._testCondWhile_3(use_gpu=True) - @test_util.disable_control_flow_v2("b/116134862 (cond output shape)") def testWhileCond_1(self): with self.cached_session(): @@ -1495,7 +1493,6 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(c, b, [i]) self.assertAllEqual(10, r.eval()) - @test_util.disable_control_flow_v2("b/116134862 (cond output shape)") def testWhileCond_2(self): with self.cached_session(): @@ -1505,7 +1502,6 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(c, b, [n]) self.assertAllEqual(10, r.eval()) - @test_util.disable_control_flow_v2("b/116134862 (cond output shape)") def testWhileCond_3(self): with self.cached_session(): @@ -2271,12 +2267,13 @@ class ControlFlowTest(test.TestCase): r = gradients_impl.gradients(r, v)[0] self.assertAllClose(1024.0, r.eval()) - @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)") def testWhileCondGrad_Simple(self): self._testWhileCondGrad_Simple(use_gpu=False) - self._testWhileCondGrad_Simple(use_gpu=True) + if not control_flow_ops.ENABLE_WHILE_V2: + # TODO(b/117519152): Enable. + self._testWhileCondGrad_Simple(use_gpu=True) - @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)") + @test_util.disable_control_flow_v2("b/117276490") def testWhileCondGrad_UnknownShape(self): with self.cached_session() as sess: v = array_ops.placeholder(dtypes.float32) -- GitLab From 6ae53a0915e5ce3bbd85e76e5c075f0ec60f23b9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 13:46:54 -0700 Subject: [PATCH 227/411] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 216586210 --- tensorflow/go/op/wrappers.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 28cd642739..c6ecd75587 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -19685,7 +19685,7 @@ func QuantizeV2RoundMode(value string) QuantizeV2Attr { // // ``` // out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8: out[i] -= (range(T) + 1) / 2.0 +// if T == qint8, out[i] -= (range(T) + 1) / 2.0 // ``` // // here `range(T) = numeric_limits::max() - numeric_limits::min()` @@ -24277,7 +24277,7 @@ func DequantizeMode(value string) DequantizeAttr { // In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: // // ``` -// if T == qint8: in[i] += (range(T) + 1)/ 2.0 +// if T == qint8, in[i] += (range(T) + 1)/ 2.0 // out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) // ``` // here `range(T) = numeric_limits::max() - numeric_limits::min()` -- GitLab From 402e5862fa772282d673d5b1a95f4373db3be1b0 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 10 Oct 2018 13:52:11 -0700 Subject: [PATCH 228/411] Use the conversion options object throughout the process (including during the deferred runtime conversions), and remove its arg_types field. This is necessary for the correct behavior when converting dynamic functions - converted_call must operate in a manner consistent with the original parameters. PiperOrigin-RevId: 216587135 --- tensorflow/python/autograph/__init__.py | 6 +- .../python/autograph/converters/call_trees.py | 15 +-- .../python/autograph/converters/decorators.py | 3 +- .../autograph/converters/decorators_test.py | 7 +- tensorflow/python/autograph/core/converter.py | 111 ++++++++++++++---- .../autograph/core/converter_testing.py | 22 ++-- tensorflow/python/autograph/impl/api.py | 82 ++++--------- tensorflow/python/autograph/impl/api_test.py | 33 ++---- .../python/autograph/impl/conversion.py | 4 +- .../python/autograph/impl/conversion_test.py | 3 +- tensorflow/python/autograph/utils/testing.py | 2 + tensorflow/python/eager/function.py | 7 +- 12 files changed, 153 insertions(+), 142 deletions(-) diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py index 5ed5e85158..e02fb36269 100644 --- a/tensorflow/python/autograph/__init__.py +++ b/tensorflow/python/autograph/__init__.py @@ -24,14 +24,14 @@ from __future__ import print_function # TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.python.autograph import operators from tensorflow.python.autograph import utils +from tensorflow.python.autograph.core.converter import ConversionOptions from tensorflow.python.autograph.core.errors import GraphConstructionError -from tensorflow.python.autograph.core.errors import TfRuntimeError from tensorflow.python.autograph.core.errors import improved_errors -from tensorflow.python.autograph.impl.api import ConversionOptions -from tensorflow.python.autograph.impl.api import RunMode +from tensorflow.python.autograph.core.errors import TfRuntimeError from tensorflow.python.autograph.impl.api import convert from tensorflow.python.autograph.impl.api import converted_call from tensorflow.python.autograph.impl.api import do_not_convert +from tensorflow.python.autograph.impl.api import RunMode from tensorflow.python.autograph.impl.api import to_code from tensorflow.python.autograph.impl.api import to_graph from tensorflow.python.autograph.lang.directives import set_element_type diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py index fc2075b781..a7926266d5 100644 --- a/tensorflow/python/autograph/converters/call_trees.py +++ b/tensorflow/python/autograph/converters/call_trees.py @@ -141,7 +141,7 @@ class CallTreeTransformer(converter.Base): if hasattr(target_entity, '__pyct_is_compile_decorator'): return False - if target_entity in self.ctx.program.autograph_decorators: + if target_entity in self.ctx.program.options.strip_decorators: return False # Inspect the target function decorators. If any include a @convert @@ -160,7 +160,7 @@ class CallTreeTransformer(converter.Base): for dec in target_node.decorator_list: decorator_fn = self._resolve_name(dec) if (decorator_fn is not None and - decorator_fn in self.ctx.program.autograph_decorators): + decorator_fn in self.ctx.program.options.strip_decorators): return False return True @@ -238,15 +238,12 @@ class CallTreeTransformer(converter.Base): # Before we could convert all the time though, we'd need a reasonable # caching mechanism. template = """ - ag__.converted_call( - func, - ag__.ConversionOptions.new(recursive=recursive_val), - args) + ag__.converted_call(func, options, args) """ call_expr = templates.replace( template, func=node.func, - recursive_val=parser.parse_expression(str(self.ctx.program.recursive)), + options=self.ctx.program.options.to_ast(self.ctx.info.namespace), args=node.args) new_call = call_expr[0].value # TODO(mdan): Improve the template mechanism to better support this. @@ -276,7 +273,7 @@ class CallTreeTransformer(converter.Base): # consider it graph ready. if anno.hasanno(node.func, 'live_val'): target_entity = anno.getanno(node.func, 'live_val') - if target_entity in self.ctx.program.autograph_decorators: + if target_entity in self.ctx.program.options.strip_decorators: if len(node.args) < 1: raise ValueError( 'Found call to decorator function "%s", but it had no arguments. ' @@ -318,7 +315,7 @@ class CallTreeTransformer(converter.Base): # ensure that they return the correct value. return node - if self.ctx.program.recursive: + if self.ctx.program.options.recursive: node = self._insert_dynamic_conversion(node) return node diff --git a/tensorflow/python/autograph/converters/decorators.py b/tensorflow/python/autograph/converters/decorators.py index 724f0fe5ed..d41893063b 100644 --- a/tensorflow/python/autograph/converters/decorators.py +++ b/tensorflow/python/autograph/converters/decorators.py @@ -56,8 +56,7 @@ class DecoratorsTransformer(converter.Base): original_dec = anno.getanno(dec_func, anno.Basic.QN) dec_value = anno.getanno(dec_func, 'live_val') - if dec_value in self.ctx.program.autograph_decorators: - # AutoGraph decorators do not need to be preserved. + if dec_value in self.ctx.program.options.strip_decorators: continue # When using foo.bar.baz, we only really need to grab foo and import diff --git a/tensorflow/python/autograph/converters/decorators_test.py b/tensorflow/python/autograph/converters/decorators_test.py index fb31c8d583..abd76849d6 100644 --- a/tensorflow/python/autograph/converters/decorators_test.py +++ b/tensorflow/python/autograph/converters/decorators_test.py @@ -57,17 +57,14 @@ def self_transform_decorator(transform): class DecoratorsTest(converter_testing.TestCase): - def _transform(self, f, autograph_decorators): + def _transform(self, f, strip_decorators): namespace = { 'self_transform_decorator': self_transform_decorator, 'simple_decorator': simple_decorator, 'converter_testing': converter_testing, } node, ctx = self.prepare( - f, - namespace, - recursive=False, - autograph_decorators=autograph_decorators) + f, namespace, recursive=False, strip_decorators=strip_decorators) node = decorators.transform(node, ctx) import_line = '\n'.join(ctx.program.additional_imports) result, _ = compiler.ast_to_object(node, source_prefix=import_line) diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py index 408a573ad0..1b07bed495 100644 --- a/tensorflow/python/autograph/core/converter.py +++ b/tensorflow/python/autograph/core/converter.py @@ -40,7 +40,7 @@ converter.ProgramContext contains mutable state across related entities. For example, when converting several functions that call one another, the ProgramContext should be shared across these entities. -Below is the overal flow at conversion: +Below is the overall flow at conversion: program_ctx = ProgramContext(, , ...) while : @@ -71,7 +71,10 @@ from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import ast_util from tensorflow.python.autograph.pyct import cfg from tensorflow.python.autograph.pyct import compiler +from tensorflow.python.autograph.pyct import inspect_utils +from tensorflow.python.autograph.pyct import parser from tensorflow.python.autograph.pyct import qual_names +from tensorflow.python.autograph.pyct import templates from tensorflow.python.autograph.pyct import transformer from tensorflow.python.autograph.pyct.static_analysis import activity from tensorflow.python.autograph.pyct.static_analysis import live_values @@ -86,43 +89,107 @@ from tensorflow.python.autograph.pyct.static_analysis import type_info # TODO(mdan): Add a test specific to this converter. +class ConversionOptions(object): + """Immutable container for global conversion flags. + + Attributes: + recursive: bool, whether to recursively convert any user functions or + classes that the converted function may use. + verbose: bool, whether to log the converted code. + strip_decorators: Tuple[Callable], contains decorators that should be in + excluded from the compiled output. By default, when converting a function + before the decorators are applied, the compiled output will include those + decorators. + force_conversion: bool, whether to force convertinng the target entity. When + force_conversion is turned off, the converter may decide to return the + function as-is. + """ + + def __init__(self, + recursive=False, + verbose=False, + strip_decorators=None, + force_conversion=False): + self.recursive = recursive + self.verbose = verbose + self.strip_decorators = strip_decorators or () + self.force_conversion = force_conversion + + def to_ast(self, namespace): + """Returns a representation of this object as an AST node. + + The AST node encodes a constructor that would create an object with the + same contents. + + Args: + namespace: Dict[str, Any], the namespace to use when serializing values to + names. + + Returns: + ast.Node + """ + template = """ + constructor_name( + recursive=recursive_val, + verbose=verbose_val, + strip_decorators=strip_decorator_names, + force_conversion=force_conversion_val) + """ + + def as_qualified_name(o): + name = inspect_utils.getqualifiedname(namespace, o) + if not name: + raise ValueError('Could not locate entity {} in {}'.format( + o, namespace)) + return name + + strip_decorators_code = '({})'.format(', '.join( + tuple(as_qualified_name(o) for o in self.strip_decorators))) + + expr_ast = templates.replace( + template, + constructor_name=parser.parse_expression( + as_qualified_name(ConversionOptions)), + recursive_val=parser.parse_expression(str(self.recursive)), + verbose_val=parser.parse_expression(str(self.verbose)), + strip_decorator_names=parser.parse_expression(strip_decorators_code), + force_conversion_val=parser.parse_expression( + str(self.force_conversion))) + return expr_ast[0].value + + class ProgramContext(object): """ProgramContext keeps track of converting function hierarchies. This object is mutable, and is updated during conversion. Not thread safe. Attributes: - recursive: bool, whether to recursively convert any functions that the - decorator function may call. - autograph_decorators: Tuple[Callable, ...], decorator functions that belong - to AutoGraph. These require special treatment. + options: ConversionOptions dependency_cache: Dict[Any, ast.AST], the original entities mapped to their - converted AST + converted AST additional_imports: Set[Any], additional entities which for any reason - cannot be attached after loading and need to be explicitly imported - in the generated code - name_map: Dict[str, str], map of original entity name to the name of - their converted counterparts - autograph_module: Module, a reference to the autograph module. This - needs to be specified by the caller to avoid circular dependencies. + cannot be attached after loading and need to be explicitly imported in the + generated code + name_map: Dict[str, str], map of original entity name to the name of their + converted counterparts + autograph_module: Module, a reference to the autograph module. This needs to + be specified by the caller to avoid circular dependencies. uncompiled_modules: Set[Tuple[str, ...]], with each tuple representing the - fully qualified name of a package containing functions that will not be - compiled. + fully qualified name of a package containing functions that will not be + compiled. required_imports: str, containing an import statement on each line. These - are all the imports necessary for the compiled code to run, in addition - to the closures of each entity, which are attached dynamically. + are all the imports necessary for the compiled code to run, in addition to + the closures of each entity, which are attached dynamically. """ def __init__( self, - recursive, - autograph_decorators, + options, partial_types, autograph_module, uncompiled_modules, ): - self.recursive = recursive - self.autograph_decorators = autograph_decorators + self.options = options self.partial_types = partial_types if partial_types else () self.autograph_module = autograph_module self.uncompiled_modules = uncompiled_modules @@ -140,7 +207,7 @@ class ProgramContext(object): tuple(self.additional_imports)) def new_namer(self, namespace): - return naming.Namer(namespace, self.recursive, self.name_map, + return naming.Namer(namespace, self.options.recursive, self.name_map, self.partial_types) def update_name_map(self, namer): @@ -294,7 +361,7 @@ def standard_analysis(node, context, is_initial=False): node: ast.AST context: converter.EntityContext is_initial: bool, whether this is the initial analysis done on the input - source code + source code Returns: ast.AST, same as node, with the static analysis annotations added diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py index fcdbd0a82c..c701053124 100644 --- a/tensorflow/python/autograph/core/converter_testing.py +++ b/tensorflow/python/autograph/core/converter_testing.py @@ -94,21 +94,13 @@ class TestCase(test.TestCase): self.dynamic_calls.append(args) return 7 - class ConversionOptions(object): - """Mock version of api.ConversionOptions.""" - - def __init__(self, recursive): - self.recursive = recursive - - @classmethod - def new(cls, recursive): - cls(recursive) - try: result, source = compiler.ast_to_object(node, include_source_map=True) + # TODO(mdan): Move this into self.prepare() result.tf = self.make_fake_mod('fake_tf', *symbols) - fake_ag = self.make_fake_mod('fake_ag', converted_call, ConversionOptions) + fake_ag = self.make_fake_mod('fake_ag', converted_call, + converter.ConversionOptions) fake_ag.__dict__.update(operators.__dict__) fake_ag.__dict__['utils'] = utils fake_ag.__dict__['rewrite_graph_construction_error'] = ( @@ -161,14 +153,16 @@ class TestCase(test.TestCase): arg_types=None, owner_type=None, recursive=True, - autograph_decorators=()): + strip_decorators=()): + namespace['ConversionOptions'] = converter.ConversionOptions + node, source = parser.parse_entity(test_fn) node = node.body[0] if namer is None: namer = FakeNamer() program_ctx = converter.ProgramContext( - recursive=recursive, - autograph_decorators=autograph_decorators, + options=converter.ConversionOptions( + recursive=recursive, strip_decorators=strip_decorators), partial_types=None, autograph_module=None, uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES) diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py index 1dc97d2331..3c31762cab 100644 --- a/tensorflow/python/autograph/impl/api.py +++ b/tensorflow/python/autograph/impl/api.py @@ -18,9 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import functools - from enum import Enum from tensorflow.python.autograph.core import config @@ -39,41 +37,6 @@ from tensorflow.python.util import tf_inspect # (currently we require (module + class name, type)) -class ConversionOptions( - collections.namedtuple('ConversionOptions', - ('recursive', 'verbose', 'strip_decorators', - 'force_conversion', 'arg_types'))): - """Container for conversion flags. - - Attributes: - recursive: bool, whether to recursively convert any user functions or - classes that the converted function may use. - verbose: bool, whether to log the compiled code. - strip_decorators: Tuple[Callable], contains decorators that should be in - excluded from the compiled output. By default, when converting a - function before the decorators are applied, the compiled output will - include those decorators. - force_conversion: bool, whether to force convertinng the target entity. - When force_conversion is turned off, the converter may decide to - return the function as-is. - arg_types: Optional[Dict[Text, Type]], type hints for symbols including - function arguments. - """ - - @classmethod - def new(cls, - recursive=False, - verbose=False, - strip_decorators=None, - force_conversion=False, - arg_types=None): - return cls(recursive=recursive, - verbose=verbose, - strip_decorators=strip_decorators or (), - force_conversion=force_conversion, - arg_types=arg_types or {}) - - # TODO(mdan): This should behave like to_graph (e.g. convert statically). def convert(recursive=False, verbose=False): """Decorator that compiles a function to use TensorFlow ops. @@ -85,13 +48,14 @@ def convert(recursive=False, verbose=False): Args: recursive: bool, whether to recursively convert any functions or classes - that the converted function may use. + that the converted function may use. verbose: bool, whether to output the compiled code in the logs. Returns: Callable, a decorator that converts the given function into an equivalent function that uses TensorFlow ops. """ + def decorator(f): """Decorator implementation.""" @@ -99,7 +63,7 @@ def convert(recursive=False, verbose=False): def wrapper(*args, **kwargs): return converted_call( f, - ConversionOptions.new( + converter.ConversionOptions( recursive=recursive, verbose=verbose, force_conversion=True, @@ -137,10 +101,10 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None): Args: run_as: RunMode, specifies how to use the function in TensorFlow. - return_dtypes: Optional[Iterable[ - Union[tf.DType, utils.py_func.MatchDType]]], the return data types of - the converted function, if run_as is RunMode.PY_FUNC. Ignored otherwise. - May be set to None if the function has no return values. + return_dtypes: Optional[Iterable[ Union[tf.DType, + utils.py_func.MatchDType]]], the return data types of the converted + function, if run_as is RunMode.PY_FUNC. Ignored otherwise. May be set to + None if the function has no return values. Returns: Callable, a decorator that wraps the original function. @@ -219,13 +183,12 @@ def converted_call(f, options, *args, **kwargs): NotImplementedError('unknown callable type "%s"' % type(f)) arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs) + arg_types = {} for name, arg in arg_values.items(): if arg is unknown_arg_value: continue arg_class = arg.__class__ - # If arg_value_hints specifies any name, use that instead. - if name not in options.arg_types: - options.arg_types[name] = (arg_class.__name__, arg_class) + arg_types[name] = (arg_class.__name__, arg_class) # When called from within a decorator, this is the only indication that # the function is a method - it appears that the decorator is applied @@ -243,7 +206,7 @@ def converted_call(f, options, *args, **kwargs): recursive=options.recursive, verbose=options.verbose, arg_values=arg_values, - arg_types=options.arg_types, + arg_types=arg_types, partial_types=partial_types, strip_decorators=options.strip_decorators) return converted_f(*effective_args, **kwargs) @@ -270,15 +233,15 @@ def to_graph(e, Args: e: Union[Callable, Type], the Python entity to convert. recursive: bool, whether to recursively convert any functions that the - converted function may call. + converted function may call. verbose: bool, whether to output the compiled code in the logs. arg_values: Optional[Dict[Text, Any]], value hints for symbols including - function arguments. + function arguments. arg_types: Optional[Dict[Text, Type]], type hints for symbols including - function arguments. + function arguments. partial_types: Set[Type], reserved for internal use. strip_decorators: Tuple[Callable], same as - ConversionOptions.strip_decorators. + ConversionOptions.strip_decorators. Returns: Union[Callable, Type], the converted entity, which is the same kind as e @@ -293,8 +256,10 @@ def to_graph(e, strip_decorators += (convert, do_not_convert, converted_call) program_ctx = converter.ProgramContext( - recursive=recursive, - autograph_decorators=strip_decorators, + options=converter.ConversionOptions( + recursive=recursive, + verbose=verbose, + strip_decorators=strip_decorators), partial_types=partial_types, autograph_module=tf_inspect.getmodule(to_graph), uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES) @@ -352,11 +317,11 @@ def to_code(e, Args: e: Union[Callable, Type], the Python entity to convert. recursive: bool, whether to recursively convert any functions that the - converted function may call. + converted function may call. arg_values: Optional[Dict[Text, Any]], value hints for symbols including - function arguments. + function arguments. arg_types: Optional[Dict[Text, Type]], type hints for symbols including - function arguments. + function arguments. partial_types: Set[Type], reserved for internal use. indentation: Text, when to use for each level of indentation. @@ -364,8 +329,9 @@ def to_code(e, Text, the converted code. """ program_ctx = converter.ProgramContext( - recursive=recursive, - autograph_decorators=(convert, do_not_convert, converted_call), + options=converter.ConversionOptions( + recursive=recursive, + strip_decorators=(convert, do_not_convert, converted_call)), partial_types=partial_types, autograph_module=tf_inspect.getmodule(to_graph), uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES) diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py index 8ce5022c0a..8567c66bf1 100644 --- a/tensorflow/python/autograph/impl/api_test.py +++ b/tensorflow/python/autograph/impl/api_test.py @@ -21,7 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.python.autograph import utils -from tensorflow.python.autograph.core import config +from tensorflow.python.autograph.core import converter from tensorflow.python.autograph.impl import api from tensorflow.python.autograph.pyct import parser from tensorflow.python.autograph.utils import py_func @@ -29,15 +29,10 @@ from tensorflow.python.framework import constant_op from tensorflow.python.platform import test from tensorflow.python.util import tf_inspect - tf = utils.fake_tf() -class ApiTest(test.TestCase): - def setUp(self): - config.COMPILED_IMPORT_STATEMENTS = ( - 'from __future__ import print_function', - ) +class ApiTest(test.TestCase): def test_decorator_recurses(self): @@ -179,9 +174,8 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): while tf.reduce_sum(x) > s: - x //= api.converted_call( - self.called_member, - api.ConversionOptions.new(), self, a) + x //= api.converted_call(self.called_member, + converter.ConversionOptions(), self, a) return x tc = TestClass() @@ -192,7 +186,7 @@ class ApiTest(test.TestCase): self.assertListEqual([0, 1], sess.run(x).tolist()) def test_converted_call_builtin(self): - x = api.converted_call(range, api.ConversionOptions.new(), 3) + x = api.converted_call(range, converter.ConversionOptions(), 3) self.assertEqual((0, 1, 2), tuple(x)) def test_converted_call_function(self): @@ -203,7 +197,7 @@ class ApiTest(test.TestCase): return x with self.cached_session() as sess: - x = api.converted_call(test_fn, api.ConversionOptions.new(), + x = api.converted_call(test_fn, converter.ConversionOptions(), constant_op.constant(-1)) self.assertEqual(1, sess.run(x)) @@ -221,7 +215,7 @@ class ApiTest(test.TestCase): with self.cached_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call(tc.test_method, api.ConversionOptions.new(), tc) + x = api.converted_call(tc.test_method, converter.ConversionOptions(), tc) self.assertEqual(1, sess.run(x)) def test_converted_call_method_by_class(self): @@ -238,9 +232,8 @@ class ApiTest(test.TestCase): with self.cached_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call( - TestClass.test_method, - api.ConversionOptions.new(), tc) + x = api.converted_call(TestClass.test_method, + converter.ConversionOptions(), tc) self.assertEqual(1, sess.run(x)) def test_converted_call_callable_object(self): @@ -257,7 +250,7 @@ class ApiTest(test.TestCase): with self.cached_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call(tc, api.ConversionOptions.new()) + x = api.converted_call(tc, converter.ConversionOptions()) self.assertEqual(1, sess.run(x)) def test_converted_call_constructor(self): @@ -273,7 +266,7 @@ class ApiTest(test.TestCase): return self.x with self.cached_session() as sess: - tc = api.converted_call(TestClass, api.ConversionOptions.new(), + tc = api.converted_call(TestClass, converter.ConversionOptions(), constant_op.constant(-1)) # tc is now a converted object. x = tc.test_method() @@ -285,12 +278,12 @@ class ApiTest(test.TestCase): return x == 0 with self.cached_session() as sess: - x = api.converted_call(f, api.ConversionOptions.new(), + x = api.converted_call(f, converter.ConversionOptions(), constant_op.constant(0)) self.assertTrue(sess.run(x)) converted_f = api.to_graph(f) - x = api.converted_call(converted_f, api.ConversionOptions.new(), + x = api.converted_call(converted_f, converter.ConversionOptions(), constant_op.constant(0)) self.assertTrue(sess.run(x)) diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py index 52abd40626..0374406ff2 100644 --- a/tensorflow/python/autograph/impl/conversion.py +++ b/tensorflow/python/autograph/impl/conversion.py @@ -145,7 +145,7 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types): program_ctx.add_to_cache(o, node) - if program_ctx.recursive: + if program_ctx.options.recursive: while True: candidate = None for obj in program_ctx.name_map.keys(): @@ -256,7 +256,7 @@ def _add_self_references(namespace, autograph_module): # internal modules. ag_internal = imp.new_module('autograph') ag_internal.converted_call = autograph_module.converted_call - ag_internal.ConversionOptions = autograph_module.ConversionOptions + ag_internal.ConversionOptions = converter.ConversionOptions ag_internal.utils = utils ag_internal.function_scope = function_wrapping.function_scope ag_internal.rewrite_graph_construction_error = ( diff --git a/tensorflow/python/autograph/impl/conversion_test.py b/tensorflow/python/autograph/impl/conversion_test.py index 07d0f75129..34550d0112 100644 --- a/tensorflow/python/autograph/impl/conversion_test.py +++ b/tensorflow/python/autograph/impl/conversion_test.py @@ -34,8 +34,7 @@ class ConversionTest(test.TestCase): def _simple_program_ctx(self): return converter.ProgramContext( - recursive=True, - autograph_decorators=(), + options=converter.ConversionOptions(recursive=True), partial_types=(), autograph_module=api, uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES) diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py index cb4785d0dc..dd6bdc8931 100644 --- a/tensorflow/python/autograph/utils/testing.py +++ b/tensorflow/python/autograph/utils/testing.py @@ -21,6 +21,7 @@ from __future__ import print_function import imp from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops @@ -28,6 +29,7 @@ def fake_tf(): """Creates a fake module that looks like TensorFlow, for testing.""" mod = imp.new_module('tensorflow') mod_contents = dict() + mod_contents.update(gen_math_ops.__dict__) mod_contents.update(math_ops.__dict__) mod_contents.update(ops.__dict__) mod_contents.update(mod.__dict__) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 191279abae..e399a4abb5 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -915,11 +915,8 @@ def func_graph_from_py_func(name, func_outputs = autograph.converted_call( python_func, autograph.ConversionOptions( - verbose=True, - recursive=True, - force_conversion=False, - strip_decorators=(defun,), - arg_types={}), *func_args, **func_kwargs) + verbose=True, recursive=True, strip_decorators=(defun,)), + *func_args, **func_kwargs) else: func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors and `None`s. -- GitLab From 3fb4383c772ed8c5dc25bc903d6be694b70bf70a Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 10 Oct 2018 13:54:48 -0700 Subject: [PATCH 229/411] Don't force random ops (that generate data) to be on the CPU. It can be slower to generate the data (specially for larger pieces of data) on the host, and then copy to GPU. Before: entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_64_channels_first" iters: 1000 wall_time: 0.00114285802841 extras { key: "examples_per_sec" value { double_value: 55999.9566079 } } } entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_128_channels_first" iters: 1000 wall_time: 0.00138294219971 extras { key: "examples_per_sec" value { double_value: 92556.2905139 } } } entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_256_channels_first" iters: 1000 wall_time: 0.00216886901855 extras { key: "examples_per_sec" value { double_value: 118033.868256 } } } After: entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_64_channels_first" iters: 1000 wall_time: 0.000907440900803 extras { key: "examples_per_sec" value { double_value: 70528.0089793 } } } entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_128_channels_first" iters: 1000 wall_time: 0.000939697980881 extras { key: "examples_per_sec" value { double_value: 136213.9779 } } } entry { name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_256_channels_first" iters: 1000 wall_time: 0.00147917103767 extras { key: "examples_per_sec" value { double_value: 173069.911105 } } } PiperOrigin-RevId: 216587562 --- .../core/common_runtime/eager/execute.cc | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index a52f933d75..0f46483ce5 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/env.h" @@ -580,6 +581,22 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, #endif } +// These ops are not pinnable since they generate data. It can be slower to +// generate and then copy the data instead of just generating the data on the +// device directly. +bool IsPinnableOp(const string& op_type) { + static const gtl::FlatSet* unpinnable_ops = new gtl::FlatSet({ + "RandomUniform", + "RandomUniformInt", + "RandomNormal", + "StatelessRandomUniform", + "StatelessRandomUniformInt", + "StatelessRandomNormal", + }); + + return unpinnable_ops->find(op_type) == unpinnable_ops->end(); +} + // The Op device may be updated if: // - A resource touching input is specified: all resource-touching ops run in // the device the resource is, regardless of anything else that has been @@ -591,7 +608,8 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, Status MaybeUpdateOpDevice(EagerOperation* op) { EagerContext* ctx = op->EagerContext(); bool device_set_for_resource_variable = false; - bool all_inputs_eligible_for_cpu_pinning = ctx->PinSmallOpsToCPU(); + bool all_inputs_eligible_for_cpu_pinning = + ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name()); for (int i = 0; i < op->Inputs().size(); ++i) { Device* input_op_device = nullptr; -- GitLab From ef64a86fe85972acda3a5195b78486c143b51b97 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 10 Oct 2018 14:09:00 -0700 Subject: [PATCH 230/411] Update to new toolchain with Clang 8.0.0 r340178. - Also update bazel to 0.16.1 because the new toolchain image requires it. PiperOrigin-RevId: 216590129 --- tensorflow/workspace.bzl | 8 ++++---- third_party/toolchains/BUILD | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b03af53cff..7238a74b73 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -694,11 +694,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "bazel_toolchains", - sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b", - strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1", + sha256 = "07dfbe80638eb1fe681f7c07e61b34b579c6710c691e49ee90ccdc6e9e75ebbb", + strip_prefix = "bazel-toolchains-9a111bd82161c1fbe8ed17a593ca1023fd941c70", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz", - "https://github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz", + "https://github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz", ], ) diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD index bcbc4dda11..6e1416ced1 100644 --- a/third_party/toolchains/BUILD +++ b/third_party/toolchains/BUILD @@ -17,7 +17,7 @@ platform( remote_execution_properties = """ properties: { name: "container-image" - value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c" + value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:63a0e981a4e7ce5da2a851cf063e430f72947fd999d9336b7e54e2eebe8e0bf5" }""", ) -- GitLab From 8e468c33d22b8da61b02254d648759860f9426bb Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 10 Oct 2018 14:21:01 -0700 Subject: [PATCH 231/411] [TF CTC] Add ctc_beam_search_decoder_v2, which disables merge_repeated. Followup of PRs #15586 and #21826. PiperOrigin-RevId: 216592105 --- tensorflow/python/ops/ctc_ops.py | 40 ++++++++++++++++++- .../tools/api/golden/v1/tensorflow.nn.pbtxt | 4 ++ .../tools/api/golden/v2/tensorflow.nn.pbtxt | 2 +- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 32d455bdad..4b0f528dfb 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -231,7 +231,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): log_probabilities) -@tf_export("nn.ctc_beam_search_decoder") +@tf_export(v1=["nn.ctc_beam_search_decoder"]) def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, top_paths=1, merge_repeated=True): """Performs beam search decoding on the logits given in input. @@ -282,6 +282,44 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, log_probabilities) +@tf_export("nn.ctc_beam_search_decoder", v1=["nn.ctc_beam_search_decoder_v2"]) +def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100, + top_paths=1): + """Performs beam search decoding on the logits given in input. + + **Note** The `ctc_greedy_decoder` is a special case of the + `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but + that decoder is faster for this special case). + + Args: + inputs: 3-D `float` `Tensor`, size + `[max_time, batch_size, num_classes]`. The logits. + sequence_length: 1-D `int32` vector containing sequence lengths, + having size `[batch_size]`. + beam_width: An int scalar >= 0 (beam search beam width). + top_paths: An int scalar >= 0, <= beam_width (controls output size). + + Returns: + A tuple `(decoded, log_probabilities)` where + decoded: A list of length top_paths, where `decoded[j]` + is a `SparseTensor` containing the decoded outputs: + `decoded[j].indices`: Indices matrix `[total_decoded_outputs[j], 2]`; + The rows store: `[batch, time]`. + `decoded[j].values`: Values vector, size `[total_decoded_outputs[j]]`. + The vector stores the decoded classes for beam `j`. + `decoded[j].dense_shape`: Shape vector, size `(2)`. + The shape values are: `[batch_size, max_decoded_length[j]]`. + log_probability: A `float` matrix `[batch_size, top_paths]` containing + sequence log-probabilities. + """ + + # Note, merge_repeated is an invalid optimization that is removed from the + # public API: it returns low probability paths. + return ctc_beam_search_decoder(inputs, sequence_length=sequence_length, + beam_width=beam_width, top_paths=top_paths, + merge_repeated=False) + + ops.NotDifferentiable("CTCGreedyDecoder") diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt index 9b28ce5746..b7f5d88fd4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt @@ -92,6 +92,10 @@ tf_module { name: "ctc_beam_search_decoder" argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'100\', \'1\', \'True\'], " } + member_method { + name: "ctc_beam_search_decoder_v2" + argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\'], varargs=None, keywords=None, defaults=[\'100\', \'1\'], " + } member_method { name: "ctc_greedy_decoder" argspec: "args=[\'inputs\', \'sequence_length\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'True\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt index 9b28ce5746..f6c5e42034 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt @@ -90,7 +90,7 @@ tf_module { } member_method { name: "ctc_beam_search_decoder" - argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'100\', \'1\', \'True\'], " + argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\'], varargs=None, keywords=None, defaults=[\'100\', \'1\'], " } member_method { name: "ctc_greedy_decoder" -- GitLab From 5275bd0ae0306ac5881c5325656a9e6e4a963df8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 14:53:37 -0700 Subject: [PATCH 232/411] create a reusable function for initializing tensorflow from tf lite. PiperOrigin-RevId: 216597926 --- tensorflow/contrib/lite/testing/BUILD | 21 +++++++++++-- .../contrib/lite/testing/init_tensorflow.cc | 31 +++++++++++++++++++ .../contrib/lite/testing/init_tensorflow.h | 26 ++++++++++++++++ .../lite/testing/tflite_diff_example_test.cc | 6 ++-- 4 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/lite/testing/init_tensorflow.cc create mode 100644 tensorflow/contrib/lite/testing/init_tensorflow.h diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index b476445b3a..f7f812343b 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -296,6 +296,23 @@ cc_test( ], ) +cc_library( + name = "init_tensorflow", + srcs = [ + "init_tensorflow.cc", + ], + hdrs = [ + "init_tensorflow.h", + ], + visibility = [ + "//tensorflow/contrib/lite/java/src/main/native:__subpackages__", + "//tensorflow/contrib/lite/testing:__subpackages__", + ], + deps = [ + "//tensorflow/core:lib", + ], +) + cc_library( name = "tflite_diff_util", srcs = ["tflite_diff_util.cc"], @@ -348,9 +365,9 @@ tf_cc_test( "tflite_not_portable", ], deps = [ + ":init_tensorflow", ":tflite_diff_flags", ":tflite_diff_util", - "//tensorflow/core:lib", ], ) @@ -358,9 +375,9 @@ cc_binary( name = "tflite_diff", srcs = ["tflite_diff_example_test.cc"], deps = [ + ":init_tensorflow", ":tflite_diff_flags", ":tflite_diff_util", - "//tensorflow/core:lib", ], ) diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.cc b/tensorflow/contrib/lite/testing/init_tensorflow.cc new file mode 100644 index 0000000000..f3dcf620a2 --- /dev/null +++ b/tensorflow/contrib/lite/testing/init_tensorflow.cc @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" + +#include +#include + +#include "tensorflow/core/platform/init_main.h" + +namespace tflite { +void InitTensorFlow() { + static const char* kFakeName = "fake program name"; + int argc = 1; + char* fake_name_copy = strdup(kFakeName); + char** argv = &fake_name_copy; + ::tensorflow::port::InitMain(kFakeName, &argc, &argv); + free(fake_name_copy); +} +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.h b/tensorflow/contrib/lite/testing/init_tensorflow.h new file mode 100644 index 0000000000..2cc89bbbca --- /dev/null +++ b/tensorflow/contrib/lite/testing/init_tensorflow.h @@ -0,0 +1,26 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ +#define TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ + +namespace tflite { + +// Initializes tensorflow's libraries. Note that this simulates an empty +// command line, so flags are not initialized. +void InitTensorFlow(); + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc index e85d9c525a..49696ac76b 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc +++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc @@ -13,17 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" #include "tensorflow/contrib/lite/testing/tflite_diff_flags.h" #include "tensorflow/contrib/lite/testing/tflite_diff_util.h" -#include "tensorflow/core/platform/init_main.h" int main(int argc, char** argv) { + ::tflite::InitTensorFlow(); // For Flex support. + ::tflite::testing::DiffOptions options = ::tflite::testing::ParseTfliteDiffFlags(&argc, argv); if (options.tensorflow_model.empty()) return 1; - ::tensorflow::port::InitMain("usage", &argc, &argv); - int failure_count = 0; for (int i = 0; i < options.num_runs_per_pass; i++) { if (!tflite::testing::RunDiffTest(options, /*num_invocations=*/1)) { -- GitLab From a584dc9020e41031c13f65625c55b715cc5ca5dd Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 10 Oct 2018 14:55:22 -0700 Subject: [PATCH 233/411] Automated rollback of commit 6aebb0866718cae2c921e875f3fd74573ee9acc8 PiperOrigin-RevId: 216598193 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 8d15c857f8..367606ef27 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1794,19 +1794,18 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook): summary_writer=summary_writer) def _log_and_record(self, elapsed_steps, elapsed_time, global_step): - global_steps_per_sec = elapsed_steps / elapsed_time - examples_per_sec = self._batch_size * global_steps_per_sec + global_step_per_sec = elapsed_steps / elapsed_time + examples_per_sec = self._batch_size * global_step_per_sec if self._summary_writer is not None: global_step_summary = Summary(value=[ - Summary.Value(tag='global_steps/sec', - simple_value=global_steps_per_sec) + Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec) ]) example_summary = Summary(value=[ Summary.Value(tag='examples/sec', simple_value=examples_per_sec) ]) self._summary_writer.add_summary(global_step_summary, global_step) self._summary_writer.add_summary(example_summary, global_step) - logging.info('global_steps/sec: %g', global_steps_per_sec) + logging.info('global_step/sec: %g', global_step_per_sec) logging.info('examples/sec: %g', examples_per_sec) -- GitLab From 4fe9033756aca1f31e087c7fee0849120d6629bc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 15:04:08 -0700 Subject: [PATCH 234/411] Bump open source abseil-cpp revision to f340f773edab951656b19b6f1a77c964a78ec4c2 PiperOrigin-RevId: 216599799 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7238a74b73..6229e01bbe 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -117,11 +117,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_absl", build_file = clean_dep("//third_party:com_google_absl.BUILD"), - sha256 = "cd1650daecfdd5591502bb017c70777c959cf604a962352bd5312bef8d78a8c6", - strip_prefix = "abseil-cpp-445998d7ac4e5d3c50411d377e3b50e960d2d6c2", + sha256 = "2809f7d97d126ad341c3126711df7bae6336278d959144db293a9b2756b726a8", + strip_prefix = "abseil-cpp-f340f773edab951656b19b6f1a77c964a78ec4c2", urls = [ - "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz", - "https://github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz", + "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz", ], ) -- GitLab From 6905ffd1cf2bea18af3486ef831dd174c3d95b7a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 15:05:57 -0700 Subject: [PATCH 235/411] [XLA] Update TODO references related to all-reduce. PiperOrigin-RevId: 216600146 --- tensorflow/compiler/xla/client/xla_builder.h | 4 ++-- tensorflow/compiler/xla/service/hlo_instruction.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 933c0e7b44..2916ac1b2a 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -699,7 +699,7 @@ class XlaBuilder { // the same channel_id, they will be 'Allreduce'd. If empty, Allreduce will // not be applied cross modules. // - // TODO(b/79737069): Rename this to AllReduce when it's ready to use. + // TODO(b/117564385): Rename this to AllReduce when it's ready to use. XlaOp CrossReplicaSum( const XlaOp& operand, const XlaComputation& computation, absl::Span replica_groups = {}, @@ -1864,7 +1864,7 @@ XlaOp CrossReplicaSum(const XlaOp& operand, // same channel_id, they will be 'Allreduce'd. If empty, Allreduce will not be // applied cross modules. // -// TODO(b/79737069): Rename this to AllReduce when it's ready to use. +// TODO(b/117564385): Rename this to AllReduce when it's ready to use. XlaOp CrossReplicaSum( const XlaOp& operand, const XlaComputation& computation, absl::Span replica_groups = {}, diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 81fe1d0a9a..15a4da8dbe 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -463,7 +463,7 @@ class HloInstruction { // the same all_reduce_id, they will be 'Allreduce'd. If empty, Allreduce will // not be applied cross modules. // - // TODO(b/79737069): Rename this to AllReduce. + // TODO(b/117564385): Rename this to AllReduce. static std::unique_ptr CreateCrossReplicaSum( const Shape& shape, absl::Span operands, HloComputation* reduce_computation, -- GitLab From ea38b380b945c5a0462c9125787439deda7f6327 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 10 Oct 2018 15:18:22 -0700 Subject: [PATCH 236/411] First draft of tf.compat.v1.wrap_function. PiperOrigin-RevId: 216602214 --- tensorflow/python/eager/BUILD | 26 ++++++ tensorflow/python/eager/function.py | 53 +++++++---- tensorflow/python/eager/wrap_function.py | 93 +++++++++++++++++++ tensorflow/python/eager/wrap_function_test.py | 59 ++++++++++++ tensorflow/python/framework/ops.py | 6 +- 5 files changed, 215 insertions(+), 22 deletions(-) create mode 100644 tensorflow/python/eager/wrap_function.py create mode 100644 tensorflow/python/eager/wrap_function_test.py diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index cae809a7c3..72cf97dca3 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -56,6 +56,7 @@ py_library( ":graph_only_ops", ":tape", ":test", + ":wrap_function", "//tensorflow/python:pywrap_tensorflow", ], ) @@ -413,3 +414,28 @@ py_test( "//tensorflow/python:framework_ops", ], ) + +py_library( + name = "wrap_function", + srcs = ["wrap_function.py"], + srcs_version = "PY2AND3", + deps = [ + ":context", + ":function", + "//tensorflow/python:framework_ops", + "//tensorflow/python:template", + "//tensorflow/python:variable_scope", + "//tensorflow/python/training/checkpointable:base", + ], +) + +py_test( + name = "wrap_function_test", + srcs = ["wrap_function_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":wrap_function", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + ], +) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index e399a4abb5..6ef07b71a9 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -537,7 +537,7 @@ class Function(object): is differentiable under `tf.GradientTape` objects. """ - def __init__(self, func_graph, attrs=None): + def __init__(self, func_graph, attrs=None, signature=None): """Initialize a Function. Args: @@ -563,6 +563,7 @@ class Function(object): _inference_name(self._func_graph.name), self._func_graph, self._func_graph.inputs, self._func_graph.outputs, self._attrs) self._backward_graph_function = None + self._signature = signature def __call__(self, *args): """Executes the wrapped function. @@ -600,6 +601,10 @@ class Function(object): tensor_inputs.append(arg.handle) elif isinstance(arg, ops.Tensor): tensor_inputs.append(arg) + elif (self._signature is not None and + isinstance(self._signature[i], tensor_spec.TensorSpec)): + tensor_inputs.append( + ops.convert_to_tensor(arg, self._signature[i].dtype)) else: raise ValueError("All inputs to `Function`s must be Tensors; " "on invocation of %s, the %d-th input (%s) was not a " @@ -841,13 +846,30 @@ def _get_defun_inputs_from_args(args): return nest.pack_sequence_as(args, function_inputs) +def check_mutation(n1, n2): + """Check if two list of arguments are exactly the same.""" + errmsg = ("Function to be traced should not modify structure of input " + "arguments. Check if your function has list and dictionary " + "operations that alter input arguments, " + "such as `list.pop`, `list.append`") + try: + nest.assert_same_structure(n1, n2) + except ValueError: + raise ValueError(errmsg) + + for arg1, arg2 in zip(nest.flatten(n1), nest.flatten(n2)): + if arg1 is not arg2: + raise ValueError(errmsg) + + def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, - experimental_autograph=False): + experimental_autograph=False, + add_control_dependencies=True): """Returns a `FuncGraph` generated from `python_func`. Args: @@ -866,6 +888,9 @@ def func_graph_from_py_func(name, this graph else a new one is built and returned. experimental_autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. + add_control_dependencies: If True, automatically adds control dependencies + to ensure program order matches execution order and stateful ops always + execute. Returns: A FuncGraph. @@ -877,7 +902,11 @@ def func_graph_from_py_func(name, if func_graph is None: func_graph = FuncGraph(name) assert isinstance(func_graph, FuncGraph) - with func_graph.as_default(), AutomaticControlDependencies() as a: + if add_control_dependencies: + control_manager = AutomaticControlDependencies + else: + control_manager = ops.NullContextmanager + with func_graph.as_default(), control_manager() as a: variable_scope.get_variable_scope().set_use_resource(True) if signature is not None: @@ -906,7 +935,8 @@ def func_graph_from_py_func(name, "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) - x = a.mark_as_return(x) + if add_control_dependencies: + x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() @@ -922,21 +952,6 @@ def func_graph_from_py_func(name, # invariant: `func_outputs` contains only Tensors and `None`s. func_outputs = nest.map_structure(convert, func_outputs) - def check_mutation(n1, n2): - """Check if two list of arguments are exactly the same.""" - errmsg = ("Function to be traced should not modify structure of input " - "arguments. Check if your function has list and dictionary " - "operations that alter input arguments, " - "such as `list.pop`, `list.append`") - try: - nest.assert_same_structure(n1, n2) - except ValueError: - raise ValueError(errmsg) - - for arg1, arg2 in zip(nest.flatten(n1), nest.flatten(n2)): - if arg1 is not arg2: - raise ValueError(errmsg) - check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py new file mode 100644 index 0000000000..2a9a164709 --- /dev/null +++ b/tensorflow/python/eager/wrap_function.py @@ -0,0 +1,93 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=unidiomatic-typecheck +"""Prototype decorator for defining legacy-graph-mode functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import function +from tensorflow.python.ops import variable_scope + + +class VariableHolder(object): + """Holds variables for a python function.""" + + def __init__(self, fn): + self._fn = fn + self._variables = [] + + def variable_creator_scope(self, next_creator, **kwargs): + v = next_creator(**kwargs) + self._variables.append(v) + return v + + def __call__(self, *args, **kwargs): + with variable_scope.variable_creator_scope(self.variable_creator_scope): + return self._fn(*args, **kwargs) + + +def wrap_function(fn, signature, name=None): + """Wraps the TF 1.x function fn into a graph function. + + The python function `fn` will be called once with symbolic arguments specified + in the `signature`, traced, and turned into a graph function. Any variables + created by `fn` will be owned by the object returned by `wrap_function`. The + resulting graph function can be called with tensors which match the + signature. + + ```python + def f(x, do_add): + v = tf.Variable(5.0) + if do_add: + op = v.assign_add(x) + else: + op = v.assign_sub(x) + with tf.control_dependencies([op]): + return v.read_value() + + f_add = tf.compat.v1.wrap_function(f, [tf.TensorSpec((), tf.float32), True]) + + assert float(f_add(1.0)) == 6.0 + assert float(f_add(1.0)) == 7.0 + + # Can call tf.compat.v1.wrap_function again to get a new trace, a new set + # of variables, and possibly different non-template arguments. + f_sub= tf.compat.v1.wrap_function(f, [tf.TensorSpec((), tf.float32), False]) + + assert float(f_sub(1.0)) == 4.0 + assert float(f_sub(1.0)) == 3.0 + ``` + + Args: + fn: python function to be wrapped + signature: the placeholder and python arguments to be passed to the + wrapped function + name: Optional. The name of the function. + + Returns: + the wrapped graph function. + """ + holder = VariableHolder(fn) + fn = function.Function( + function.func_graph_from_py_func( + name, + holder, + args=None, kwargs=None, signature=signature, + add_control_dependencies=False), + signature=signature) + fn._variable_holder = holder + return fn diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py new file mode 100644 index 0000000000..0690358491 --- /dev/null +++ b/tensorflow/python/eager/wrap_function_test.py @@ -0,0 +1,59 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.eager import wrap_function +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class WrapFunctionTest(test.TestCase): + + def testDocString(self): + + def f(x, do_add): + v = variables.Variable(5.0) + if do_add: + op = v.assign_add(x) + else: + op = v.assign_sub(x) + with ops.control_dependencies([op]): + return v.read_value() + + f_add = wrap_function.wrap_function( + f, [tensor_spec.TensorSpec((), dtypes.float32), True]) + + self.assertAllEqual(f_add(1.0), 6.0) + self.assertAllEqual(f_add(1.0), 7.0) + + # Can call tf.compat.v1.wrap_function again to get a new trace, a new set + # of variables, and possibly different non-template arguments. + f_sub = wrap_function.wrap_function( + f, [tensor_spec.TensorSpec((), dtypes.float32), False]) + + self.assertAllEqual(f_sub(1.0), 4.0) + self.assertAllEqual(f_sub(1.0), 3.0) + + +if __name__ == '__main__': + ops.enable_eager_execution() + test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 140bd098a6..50ab118fd6 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -101,7 +101,7 @@ class _UserDeviceSpec(object): self.function = pydev.merge_device(self._device_name_or_function) -class _NullContextmanager(object): +class NullContextmanager(object): def __enter__(self): pass @@ -4951,7 +4951,7 @@ def _colocate_with_for_gradient(op, gradient_uid, ignore_existing=False): if op is not None: return device(op.device) else: - return _NullContextmanager() + return NullContextmanager() else: default_graph = get_default_graph() if isinstance(op, EagerTensor): @@ -4996,7 +4996,7 @@ def control_dependencies(control_inputs): for control in control_inputs: if callable(control): control() - return _NullContextmanager() + return NullContextmanager() else: return get_default_graph().control_dependencies(control_inputs) -- GitLab From 20c47de2eabec47391e19f8fe4da5d83f0f8ab85 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Wed, 10 Oct 2018 15:19:12 -0700 Subject: [PATCH 237/411] Internal change PiperOrigin-RevId: 216602362 --- tensorflow/contrib/lite/delegates/flex/BUILD | 38 +++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD index 9b89ed4f84..2f866eaecb 100644 --- a/tensorflow/contrib/lite/delegates/flex/BUILD +++ b/tensorflow/contrib/lite/delegates/flex/BUILD @@ -42,8 +42,31 @@ tf_cc_test( ], ) +# Delegate implementation that pulls in the standard set of TensorFlow ops and +# kernels. cc_library( name = "delegate", + hdrs = [ + "delegate.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":delegate_only_runtime", + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:tensorflow", + ], + }), + alwayslink = 1, +) + +# Delegate implementation that does *not* pull in the standard set of TensorFlow +# ops and kernels. +cc_library( + name = "delegate_only_runtime", srcs = [ "delegate.cc", ], @@ -134,12 +157,12 @@ cc_library( # set of core TensorFlow kernels. We may want to revisit this dependency # to allow selective registration via build targets. "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework", - "//tensorflow/core:tensorflow", ], }), ) @@ -153,7 +176,14 @@ tf_cc_test( ":kernel", ":test_util", "@com_google_googletest//:gtest", - ], + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:tensorflow", + ], + }), ) cc_library( @@ -180,7 +210,7 @@ cc_library( "//tensorflow/contrib/lite:kernel_api", ] + select({ "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core:lib", -- GitLab From 0c284043cbdaed1f88c43be3a222d3bc1c235e89 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 10 Oct 2018 15:35:14 -0700 Subject: [PATCH 238/411] [TF:XLA] Don't aggregate writes to TensorArrays that aren't gradient TensorArrays. In the non-XLA case it is an error to write the same entry multiple times. This saves reading the existing value from memory in cases where it should not be meaningful. PiperOrigin-RevId: 216604960 --- .../compiler/tf2xla/kernels/tensor_array_ops.cc | 13 ++++++++++--- tensorflow/compiler/tf2xla/xla_context.cc | 3 ++- tensorflow/compiler/tf2xla/xla_resource.cc | 13 +++++++++---- tensorflow/compiler/tf2xla/xla_resource.h | 10 +++++++++- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc index 6cdfaf4d97..06a560d947 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc @@ -223,9 +223,16 @@ class TensorArrayWriteOp : public XlaOpKernel { slice_shape.InsertDim(0, 1LL); auto update = xla::Reshape(value, slice_shape.dim_sizes()); - xla::XlaOp written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), - start_indices, dtype_); - + xla::XlaOp written; + if (resource->tensor_array_multiple_writes_aggregate()) { + written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), + start_indices, dtype_); + } else { + // TODO(b/117569591): Ideally we would report an error in the case that we + // see multiple writes to the same offset. Unfortunately there is no way + // to report errors at the moment, so we silently overwrite. + written = xla::DynamicUpdateSlice(ta, update, start_indices); + } OP_REQUIRES_OK(ctx, resource->SetValue(written)); ctx->SetOutput(0, flow); } diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index f247570d72..2095a6b809 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -138,7 +138,8 @@ Status XlaContext::CreateResource( const std::set& tensor_array_gradients, XlaResource** resource) { resources_.emplace_back( new XlaResource(kind, arg_num, std::move(name), type, std::move(shape), - handle, tensor_array_size, tensor_array_gradients)); + handle, tensor_array_size, tensor_array_gradients, + /*tensor_array_multiple_writes_aggregate=*/false)); *resource = resources_.back().get(); return Status::OK(); } diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc index 56c2e01055..63b09c8f02 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.cc +++ b/tensorflow/compiler/tf2xla/xla_resource.cc @@ -29,7 +29,8 @@ namespace tensorflow { XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, - const std::set& tensor_array_gradients) + const std::set& tensor_array_gradients, + bool tensor_array_multiple_writes_aggregate) : kind_(kind), arg_num_(arg_num), name_(std::move(name)), @@ -37,14 +38,17 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, shape_(std::move(shape)), value_(initial_value), initial_value_(initial_value), - tensor_array_size_(tensor_array_size) { + tensor_array_size_(tensor_array_size), + tensor_array_multiple_writes_aggregate_( + tensor_array_multiple_writes_aggregate) { CHECK(kind_ != kInvalid); for (const string& gradient : tensor_array_gradients) { tensor_array_gradients_[gradient].reset(new XlaResource( /*kind=*/kTensorArray, /*arg_num=*/-1, /*name=*/absl::StrCat("TensorArrayGrad: ", name_), type_, shape_, - xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{})); + xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{}, + /*tensor_array_multiple_writes_aggregate=*/true)); } } @@ -137,7 +141,8 @@ Status XlaResource::GetOrCreateTensorArrayGradient(const string& source, new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1, /*name=*/absl::StrCat("TensorArrayGrad: ", name_), type_, shape_, gradient_value, tensor_array_size_, - /*tensor_array_gradients=*/{})); + /*tensor_array_gradients=*/{}, + /*tensor_array_multiple_writes_aggregate=*/true)); } *gradient_out = gradient.get(); return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h index 2438490be1..aa9ce1b171 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.h +++ b/tensorflow/compiler/tf2xla/xla_resource.h @@ -39,7 +39,8 @@ class XlaResource { XlaResource(Kind kind, int arg_num, string name, DataType type, TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, - const std::set& tensor_array_gradients); + const std::set& tensor_array_gradients, + bool tensor_array_multiple_writes_aggregate); XlaResource(const XlaResource&) = delete; XlaResource(XlaResource&&) = delete; @@ -113,6 +114,8 @@ class XlaResource { const xla::XlaOp& pack, xla::XlaBuilder* builder); // TensorArray and Stack specific fields + // TODO(phawkins): refactor this code to use subclasses, rather than putting + // kind-specific fields in XlaResource. // 'tensor_array_size' stores the expected size of the TensorArray or Stack. // We need to store this since sometimes TensorArrays must be initialized @@ -121,6 +124,10 @@ class XlaResource { int64 tensor_array_size() const { return tensor_array_size_; } void set_tensor_array_size(int64 size) { tensor_array_size_ = size; } + bool tensor_array_multiple_writes_aggregate() const { + return tensor_array_multiple_writes_aggregate_; + } + // 'tensor_array_gradient' is a map from TensorArrayGradV3 'source' attributes // to an XlaResource containing the gradient TensorArrays. We store a pointer // here since there should only be one gradient TensorArray per 'source' @@ -143,6 +150,7 @@ class XlaResource { xla::XlaOp initial_value_; int64 tensor_array_size_ = -1; + bool tensor_array_multiple_writes_aggregate_ = false; std::map> tensor_array_gradients_; }; -- GitLab From a67fda82206665d195398f68c2b0fad49a4cfccd Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Wed, 10 Oct 2018 15:40:39 -0700 Subject: [PATCH 239/411] Only acquire the flex delegate when flex ops present PiperOrigin-RevId: 216605828 --- tensorflow/contrib/lite/model.cc | 37 +++++++++++++++++++++++++------- tensorflow/contrib/lite/model.h | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index d7b109ac1a..a8a010be1a 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -384,6 +384,33 @@ TfLiteStatus InterpreterBuilder::ParseTensors( return status; } +TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter) { + // TODO(b/117561550): Move flex delegate application to the OpResolver. + if (AcquireFlexDelegate == nullptr) { + return kTfLiteOk; + } + + bool has_flex_op = false; + for (const auto* registration : flatbuffer_op_index_to_registration_) { + if ((registration->builtin_code == BuiltinOperator_CUSTOM) && + IsFlexOp(registration->custom_name)) { + has_flex_op = true; + break; + } + } + + if (!has_flex_op) { + return kTfLiteOk; + } + + if (auto flex_delegate = AcquireFlexDelegate()) { + return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate), + /*allow_dynamic_tensors=*/true); + } + + return kTfLiteOk; +} + TfLiteStatus InterpreterBuilder::operator()( std::unique_ptr* interpreter) { return operator()(interpreter, /*num_threads=*/-1); @@ -466,14 +493,8 @@ TfLiteStatus InterpreterBuilder::operator()( } (**interpreter).SetVariables(std::move(variables)); - // TODO(b/116667551): Only create the flex delegate if the model has flex ops. - if (AcquireFlexDelegate != nullptr) { - if (auto flex_delegate = AcquireFlexDelegate()) { - (**interpreter) - .ModifyGraphWithDelegate(std::move(flex_delegate), - /*allow_dynamic_tensors=*/true); - } - } + if (ApplyDelegates(interpreter->get()) != kTfLiteOk) + return cleanup_and_error(); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 6abdfcd079..9505824dcc 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -173,6 +173,7 @@ class InterpreterBuilder { const flatbuffers::Vector>* buffers, const flatbuffers::Vector>* tensors, Interpreter* interpreter); + TfLiteStatus ApplyDelegates(Interpreter* interpreter); const ::tflite::Model* model_; const OpResolver& op_resolver_; -- GitLab From dd03b7d2a55b5501f3fcabc4ff0701ac2e9b3364 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 15:54:35 -0700 Subject: [PATCH 240/411] Don't crash an XRT server if a client leaks a compilation reference. PiperOrigin-RevId: 216608167 --- tensorflow/compiler/xrt/tests/raw_api_test.cc | 21 +++++++++++++++++++ .../compiler/xrt/xrt_compilation_cache.cc | 11 +++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index f590fbf0d9..9fc01e6304 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -437,6 +437,27 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); } +TEST(RawApiTest, LeakCompilationReference) { + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2}); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2}); + *shapes->mutable_result() = xla::ShapeUtil::MakeTupleShape( + {xla::ShapeUtil::MakeShape(xla::F32, {2})}); + StoreComputationSnapshot(AddAndTuple(), c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({c_handle}, &outputs)); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.cc b/tensorflow/compiler/xrt/xrt_compilation_cache.cc index 4844c7fb71..31bb476895 100644 --- a/tensorflow/compiler/xrt/xrt_compilation_cache.cc +++ b/tensorflow/compiler/xrt/xrt_compilation_cache.cc @@ -46,12 +46,17 @@ XRTCompilationCache::XRTCompilationCache(int max_number_of_entries) XRTCompilationCache::~XRTCompilationCache() { VLOG(1) << "XRTCompilationCache::~XRTCompilationCache()"; + // A buggy client may be holding onto a reference, or a client might have + // crashed while holding onto a reference. In either case, discard all + // outstanding client references to avoid leaking storage. + for (const auto& entry : entries_by_uid_) { + while (!entry.second->RefCountIsOne()) { + entry.second->Unref(); + } + } while (!entries_by_last_use_.empty()) { MarkOldestEntryForEviction(); } - // By the time the cache is deleted all reference holders should have already - // been deleted, since they were holding references to the cache. So all - // entries should be gone at this point. CHECK_EQ(cache_.size(), 0); CHECK_EQ(entries_by_uid_.size(), 0); CHECK_EQ(cache_entries_, 0); -- GitLab From 883083715e6cfe400d0c0c08dff73b924cc1b72f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 10 Oct 2018 16:27:58 -0700 Subject: [PATCH 241/411] [XLA] Don't overestimate the amount of bytes a gather reads PiperOrigin-RevId: 216613367 --- .../compiler/xla/service/hlo_cost_analysis.cc | 5 ++++ .../xla/service/hlo_cost_analysis_test.cc | 25 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index a502fff9a0..23ab4cda93 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -664,6 +664,11 @@ Status HloCostAnalysis::HandleConditional(const HloInstruction* conditional) { } Status HloCostAnalysis::HandleGather(const HloInstruction* gather) { + // Gather doesn't read the whole input buffer, it's equivalent to a copy the + // size of the output shape and a read of the gather indices. + current_properties_[kBytesAccessedKey] = + GetShapeSize(gather->shape()) * 2 + + GetShapeSize(gather->operand(1)->shape()); // Gather does not issue any flops. return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index d76ce9ecbc..802cdfc9e4 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -556,5 +556,30 @@ TEST_F(HloCostAnalysisTest, DynamicUpdateSlice) { EXPECT_EQ(analysis.bytes_accessed(), 8); } +TEST_F(HloCostAnalysisTest, Gather) { + // Test the analysis on a gather. + XlaBuilder builder("gather"); + Shape operand_shape = ShapeUtil::MakeShape(S32, {3, 3}); + Shape indices_shape = ShapeUtil::MakeShape(S32, {2}); + + auto operand = Parameter(&builder, 0, operand_shape, "operand"); + auto indices = Parameter(&builder, 1, indices_shape, "indices"); + GatherDimensionNumbers dim_numbers; + dim_numbers.add_offset_dims(1); + dim_numbers.add_collapsed_slice_dims(0); + dim_numbers.add_start_index_map(0); + dim_numbers.set_index_vector_dim(1); + Gather(operand, indices, dim_numbers, {1, 3}); + + auto hlo_module = BuildHloGraph(&builder); + + // Run HLO cost analysis. + HloCostAnalysis analysis(ShapeSize); + ASSERT_IS_OK( + hlo_module->entry_computation()->root_instruction()->Accept(&analysis)); + + EXPECT_EQ(analysis.bytes_accessed(), 56); +} + } // namespace } // namespace xla -- GitLab From 09e098e5057ca1a781573a199726da750e90e4f3 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Wed, 10 Oct 2018 16:50:47 -0700 Subject: [PATCH 242/411] Automated rollback of commit d6a3d6a8295359364c86aecc479e6392bcde0ce4 PiperOrigin-RevId: 216617037 --- tensorflow/cc/BUILD | 36 ++++- tensorflow/core/graph/graph.cc | 2 +- tensorflow/core/grappler/optimizers/BUILD | 1 + .../optimizers/data/vectorization/BUILD | 38 +++-- .../data/vectorization/add_vectorizer.cc | 150 ++++++++++++++++++ .../optimizers/data/vectorization_utils.cc | 21 +-- .../data/vectorization_utils_test.cc | 103 ++++++++++-- .../optimization/map_vectorization_test.py | 1 + tensorflow/tensorflow.bzl | 9 +- 9 files changed, 325 insertions(+), 36 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 9d2208d84d..c18b07603a 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -454,11 +454,33 @@ tf_cc_test( ], ) +# Generates separate libraries for array_ops and math_ops to reduce the dependency count of targets that depend on only these tf_gen_op_wrappers_cc( - name = "cc_ops", + name = "math_ops", + api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], + op_lib_names = [ + "math_ops", + ], + pkg = "//tensorflow/core", +) + +tf_gen_op_wrappers_cc( + name = "array_ops", api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], op_lib_names = [ "array_ops", + ], + pkg = "//tensorflow/core", +) + +tf_gen_op_wrappers_cc( + name = "cc_ops", + api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], + deps_internal = [ + ":array_ops_internal", + ":math_ops_internal", + ], + op_lib_names = [ "audio_ops", "candidate_sampling_ops", "control_flow_ops", @@ -469,7 +491,6 @@ tf_gen_op_wrappers_cc( "logging_ops", "lookup_ops", "manip_ops", - "math_ops", "nn_ops", "no_op", "parsing_ops", @@ -481,10 +502,21 @@ tf_gen_op_wrappers_cc( "user_ops", ], other_hdrs = [ + "ops/array_ops.h", "ops/const_op.h", + "ops/math_ops.h", "ops/standard_ops.h", ], + other_hdrs_internal = [ + "ops/array_ops_internal.h", + "ops/math_ops_internal.h", + ], pkg = "//tensorflow/core", + deps = [ + ":array_ops", + ":const_op", + ":math_ops", + ], ) tf_cc_test( diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 6f068546d2..a17491d4f7 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -34,7 +34,7 @@ namespace tensorflow { const int Graph::kControlSlot = -1; -class NodeProperties { +struct NodeProperties { public: NodeProperties(const OpDef* op_def, const NodeDef& node_def, const DataTypeSlice inputs, const DataTypeSlice outputs) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index e898377ded..43a7d6a70b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -113,6 +113,7 @@ tf_cc_test( deps = [ ":constant_folding", ":dependency_optimizer", + "//tensorflow/cc:array_ops_internal", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", "//tensorflow/core:all_kernels", diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD index 985d6c6c3a..cf84ac710a 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD +++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD @@ -9,7 +9,13 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all") VECTORIZER_DEPS = [ ":vectorizer_registry", + "//tensorflow/cc:ops", "//tensorflow/core/grappler/optimizers/data:graph_utils", + "//tensorflow/core:core_cpu", + "//tensorflow/cc:scope_internal", + "//tensorflow/cc:math_ops", + "//tensorflow/cc:array_ops", + "//tensorflow/cc:const_op", ] + tf_protos_all() cc_library( @@ -42,6 +48,26 @@ cc_library( ], ) +tf_cc_test( + name = "vectorizer_registry_test", + srcs = ["vectorizer_registry_test.cc"], + deps = [ + ":vectorizer_registry", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ] + tf_protos_all(), +) + +cc_library( + name = "add_vectorizer", + srcs = [ + "add_vectorizer.cc", + ], + deps = VECTORIZER_DEPS, + alwayslink = 1, +) + cc_library( name = "cast_vectorizer", srcs = ["cast_vectorizer.cc"], @@ -61,20 +87,10 @@ cc_library( hdrs = ["vectorizer_registry.h"], visibility = ["//visibility:public"], deps = [ + ":add_vectorizer", ":cast_vectorizer", ":unpack_vectorizer", ":vectorizer", ":vectorizer_registry", ], ) - -tf_cc_test( - name = "vectorizer_registry_test", - srcs = ["vectorizer_registry_test.cc"], - deps = [ - ":vectorizer_registry", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ] + tf_protos_all(), -) diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc new file mode 100644 index 0000000000..9f2679a5ef --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc @@ -0,0 +1,150 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope_internal.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h" + +namespace tensorflow { +namespace grappler { + +namespace { + +const char* const kExpandDimsPrefix = "vectorized/expanddims/"; + +// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading +// dimension, which may cause automatic broadcasting rules to expand the +// input dimensions wrongly when the unstacked shapes have different ranks. +// To avoid that, we reshape stacked inputs to the maximum rank they need +// to be broadcasted to. +// +// For example, suppose we have inputs A and B, where A is a stacked tensor with +// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with +// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules +// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that +// the dimensions n and 7 are compatible, and if so, create an output of shape +// [12, 7, 5]. However, correct addition of these inputs would create an output +// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A +// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before +// broadcasting. +Status ExpandDimsForBroadcast(std::vector* inputs, Graph* g) { + Status status; + Scope parent = NewInternalScope(g, &status, nullptr); + Scope s = parent.NewSubScope(kExpandDimsPrefix); + + // TODO(rachelim): We can potentially get rid of all these ops if shapes are + // known statically + + Output const_0 = ops::Const(s, 0); + Output const_1 = ops::Const(s, 1); + + std::vector ranks; + ranks.reserve(inputs->size()); + + // Get the stacked rank of each input + for (const auto& input : *inputs) { + Output rank = ops::Rank(s, Output(input.node, input.output_index)); + + if (!input.stacked) { + // If the input is unstacked, add 1 + rank = ops::Add(s, rank, const_1); + } + + ranks.push_back(rank); + } + + // Pack the ranks into one tensor to get the max + Output packed_ranks = ops::Stack(s, ranks); + + Output max_rank = + ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true)); + + std::vector expanded_inputs; + expanded_inputs.reserve(inputs->size()); + + // For all inputs that are stacked, expand dimensions after dim 0. + for (size_t i = 0; i < inputs->size(); ++i) { + if (!inputs->at(i).stacked) { + expanded_inputs.push_back(inputs->at(i)); + continue; + } + + Output input(inputs->at(i).node, inputs->at(i).output_index); + + // Number of dimensions to expand + Output rank_diff = ops::Sub(s, max_rank, ranks[i]); + + // [1] * rank_diff + Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff); + + Output const_vec_1 = ops::Const(s, {1}); + + Output shape = ops::Shape(s, input); + + // shape[:1] + Output concat_pre = + ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, + ops::StridedSlice::Attrs().BeginMask(1)); + + // shape[1:] + Output concat_post = + ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, + ops::StridedSlice::Attrs().EndMask(1)); + + // tf.concat([shape[:1], ones, shape[1:]], 0) + Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0); + + Output result = ops::Reshape(s, input, new_shape); + + expanded_inputs.push_back({result.node(), 0, true}); + } + + inputs->swap(expanded_inputs); + return status; +} + +class AddVectorizer : public Vectorizer { + public: + Status Vectorize(const Node& node, Graph* outer_scope, + std::vector&& inputs, + std::vector* outputs) override { + if (node.num_inputs() != 2) { + return errors::Internal("Add op should only have two inputs."); + } + + TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope)); + + // Add new Add node with the same op and attrs as the original node + Node* new_add_node; + TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add") + .Input(inputs[0].node, inputs[0].output_index) + .Input(inputs[1].node, inputs[1].output_index) + .Finalize(outer_scope, &new_add_node)); + + // Add output mappings + outputs->push_back({new_add_node, 0, true}); + return Status::OK(); + } +}; + +REGISTER_VECTORIZER("Add", AddVectorizer); + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index d977ff3198..8b93b1f2b8 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -64,9 +64,18 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src, } } +// Update node attrs to keep its properties consistent with the function +void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) { + map_defun_node->AddAttr("output_types", map_defun_fn->ret_types); + + // TODO(rachelim): Propagate precise shapes if they're known, which may enable + // subsequent optimizations. + map_defun_node->AddAttr("output_shapes", std::vector( + map_defun_fn->ret_types.size())); +} + Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, const TensorDesc& output) { - // Note that we don't update MapDefun attrs as we go, only when we are done DataType type = output.first->output_type(output.second); int index = map_defun_fn->ret_nodes.size(); @@ -83,13 +92,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node, map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0); map_defun_fn->ret_nodes.push_back(ret_node); map_defun_fn->ret_types.push_back(type); + UpdateMapDefunAttrs(map_defun_fn, map_defun_node); return s; } void RemoveMapDefunOutput(int output_position, Graph* outer_scope, FunctionBody* map_defun_fn, Node* map_defun_node) { - // Note that we don't update MapDefun attrs as we go, only when we are done DCHECK_LT(output_position, map_defun_fn->ret_nodes.size()) << "Trying to remove output that doesn't exist. Output number: " << output_position; @@ -102,6 +111,7 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope, output_position); map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() + output_position); + UpdateMapDefunAttrs(map_defun_fn, map_defun_node); // Renumber the nodes and edges that come after for (int i = 0; i < num_later_outputs; ++i) { @@ -342,13 +352,6 @@ void Vectorization::VectorizeHelper() { // need the MapDefun node and can delete it. if (map_defun_fn_->ret_nodes.empty()) { outer_scope_->RemoveNode(map_defun_node_); - } else { - // Update MapDefun node attrs accordingly - DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size()); - map_defun_node_->AddAttr( - "output_shapes", - std::vector(map_defun_fn_->ret_types.size())); - map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types); } } diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc index a6020e36bb..be498d150b 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc @@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) { FunctionDef* vectorized; Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized); LOG(ERROR) << s; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_EQ(GetRetval(*vectorized, 0), "ret0"); @@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); auto map_defun_node = vectorized->node_def( function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized)); @@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& unpack_node = vectorized->node_def( @@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); const NodeDef& cast_node = vectorized->node_def( @@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); // They should be unchanged // We check this somewhat manually as the names of nodes may have changed EXPECT_EQ(vectorized->node_def_size(), 1); @@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized)); @@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) { *lib.add_function() = outer; *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); EXPECT_TRUE( !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); auto const_node = vectorized->node_def( @@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { *lib.add_function() = inner; FunctionDef* vectorized; - EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok()); + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); auto find_const = [vectorized](int val) -> const NodeDef* { for (const auto& n : vectorized->node_def()) { @@ -924,6 +924,89 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) { EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name())); } +// Before: +// +// +------+ +// +-----------------+ Arg0 +----------------------+ +// | +---+--+ | +// | | | +// | +---v--+ | +// | +-------------+ Arg0 +------------------+ | +// | | +---+--+ | | +// | | | | | +// | | | +-----+ | | +// | | | |Const| | | +// | | | +-+---+ | | +// | | | | | | +// | | | +--------+ | | +// | | | | | | +// | | +-v---v-+ | | +// | | | Add | | | +// | | +-+-----+ | | +// | | | | | +// | | | | | +// | | MapDefun +-v----+ | | +// | +---------------| Ret |----------------+ | +// | +--v---+ | +// | | | +// | | | +// | +--v---- | +// +-------------------| Ret |--------------------+ +// +------+ +// +// +// After: +// +// +------+ +// +------------+ Arg0 +----------------------+ +// | +---+--+ | +// | | | +// | | +-----+ | +// | | |Const| | +// | +-v---------+ +--+--+ | +// | |ExpandDims*| | | +// | +-----+-----+ | | +// | | | | +// | +-----+ +-----+ | +// | | | | +// | +-v-v-+ | +// | | Add | | +// | +--+--+ | +// | | | +// | +---v--+ | +// +-----------------------+ Ret +-----------+ +// +------+ +// +TEST(VectorizeMapDefunTest, VectorizeDefunAdd) { + // Note that this checks that the "Add" vectorizer is successful, but does not + // check that the transformed function is correct (i.e. produces the same + // output as the unvectorized map defun). For the latter, the tests are in + // tensorflow/python/data/experimental/kernel_tests/optimization/ + // map_vectorization_test.py + FunctionDef inner = FunctionDefHelper::Create( + "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */}, + {/* nodes */ FunctionDefHelper::Const("Const", 2), + {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}}, + {{"ret0", "Add:z:0"}}); + + FunctionDef outer = FunctionDefHelper::Create( + "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"}, + {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}}); + + NodeDef* map_defun = + AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}}, + inner.signature().name(), &outer); + CHECK_NOTNULL(map_defun); + + FunctionDefLibrary lib; + *lib.add_function() = outer; + *lib.add_function() = inner; + FunctionDef* vectorized; + TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized)); + EXPECT_TRUE( + !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized)); +} + // TODO(rachelim): More test cases when we get around to implementing them: // [] A badly defined converter, e.g. doesn't produce nodes that have the // same number of outputs/inputs as the nodes to be converted diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py index 803ff87924..d1d6cf28ab 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py @@ -80,6 +80,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): ("Basic", lambda x: (x, x + 1), None), ("Const", lambda x: 2, 12), ("Parallel", lambda x: (x, x + 1), 12), + ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None), ("Gather", lambda x: array_ops.gather(x, 0), 12), ) def testOptimization(self, map_fn, num_parallel_calls): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index df15914233..53a382bd49 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -528,12 +528,15 @@ def tf_gen_op_wrappers_cc( op_lib_names = [], other_srcs = [], other_hdrs = [], + other_srcs_internal = [], + other_hdrs_internal = [], pkg = "", deps = [ clean_dep("//tensorflow/cc:ops"), clean_dep("//tensorflow/cc:scope"), clean_dep("//tensorflow/cc:const_op"), ], + deps_internal = [], op_gen = clean_dep("//tensorflow/cc:cc_op_gen_main"), include_internal_ops = 0, visibility = None, @@ -541,8 +544,8 @@ def tf_gen_op_wrappers_cc( api_def_srcs = []): subsrcs = other_srcs[:] subhdrs = other_hdrs[:] - internalsrcs = [] - internalhdrs = [] + internalsrcs = other_srcs_internal[:] + internalhdrs = other_hdrs_internal[:] for n in op_lib_names: tf_gen_op_wrapper_cc( n, @@ -577,7 +580,7 @@ def tf_gen_op_wrappers_cc( name = name + "_internal", srcs = internalsrcs, hdrs = internalhdrs, - deps = deps + if_not_android([ + deps = deps + deps_internal + if_not_android([ clean_dep("//tensorflow/core:core_cpu"), clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), -- GitLab From 84f81b91a7bf938560ef40974dd672b95803db42 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Wed, 10 Oct 2018 16:55:25 -0700 Subject: [PATCH 243/411] Add HloModule::Clone overload which takes an HloModuleConfig. PiperOrigin-RevId: 216617647 --- tensorflow/compiler/xla/service/hlo_module.cc | 7 ++++++- tensorflow/compiler/xla/service/hlo_module.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 93e04eb3db..4b0612b368 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -558,8 +558,13 @@ std::vector HloModule::MakeNonfusionComputations() const { } std::unique_ptr HloModule::Clone(const string& suffix) const { + return Clone(config(), suffix); +} + +std::unique_ptr HloModule::Clone(const HloModuleConfig& config, + const string& suffix) const { VLOG(1) << "Cloning module :" << name_ << " --> " << suffix << "\n"; - auto module = absl::make_unique(name_ + "-" + suffix, config_); + auto module = absl::make_unique(name_ + "-" + suffix, config); HloCloneContext context(module.get(), suffix); auto cloned_computation = entry_computation_->Clone(suffix, &context); diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index a1ffdc4023..de6d3a13bf 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -92,6 +92,8 @@ class HloModule { // Returns a deep copy of this module including all computations. std::unique_ptr Clone(const string& suffix = "clone") const; + std::unique_ptr Clone(const HloModuleConfig& config, + const string& suffix = "clone") const; // Performs a deep clone of the computation, by recursively cloning all // the called computations as well. If the clone context is specified, it -- GitLab From 9bad98c61f27b60152119bb1c2cfd402c3bf7f3d Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 10 Oct 2018 16:59:33 -0700 Subject: [PATCH 244/411] [tf.data] Changing the background performance modeling to be on by default. PiperOrigin-RevId: 216618194 --- tensorflow/core/framework/model.cc | 32 +++++++++++++------ .../core/kernels/data/cache_dataset_ops.cc | 14 ++++---- .../assert_next_dataset_op_test.py | 12 ------- .../data/experimental/ops/prefetching_ops.py | 9 ++++++ tensorflow/python/data/ops/dataset_ops.py | 29 ++++++++--------- .../data/ops/multi_device_iterator_ops.py | 4 +++ 6 files changed, 57 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index bfdb3a6658..9684b736a7 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -59,9 +59,15 @@ int64 Model::Node::ProcessingTimeLocked() { return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs(); } case Type::FILTER: { + if (inputs_.size() <= 1) { + return NanosPerElementLocked(); + } std::shared_ptr input = inputs_.front(); - double ratio = static_cast(input->num_elements()) / - static_cast(num_elements_); + double ratio = 0.0L; + if (num_elements_ > 0) { + ratio = static_cast(input->num_elements()) / + static_cast(num_elements_); + } return NanosPerElementLocked() + static_cast(ratio * static_cast(ProcessingTimeForInputs())); @@ -115,15 +121,21 @@ int64 Model::Node::OutputTimeLocked(std::vector* input_times) { batch_size * OutputTimeForInputs(input_times); } case Type::FILTER: { + if (inputs_.size() <= 1) { + return NanosPerElementLocked(); + } std::shared_ptr input = inputs_.front(); - int64 old_value = (*input_times)[input_times->size() - 1]; - double ratio = static_cast(input->num_elements()) / - static_cast(num_elements_); - (*input_times)[input_times->size() - 1] = static_cast( - static_cast(old_value + NanosPerElementLocked()) / ratio); - auto cleanup = gtl::MakeCleanup([input_times, old_value]() { - (*input_times)[input_times->size() - 1] = old_value; - }); + double ratio = 0.0L; + if (num_elements_ > 0) { + ratio = static_cast(input->num_elements()) / + static_cast(num_elements_); + int64 old_value = (*input_times)[input_times->size() - 1]; + (*input_times)[input_times->size() - 1] = static_cast( + static_cast(old_value + NanosPerElementLocked()) / ratio); + auto cleanup = gtl::MakeCleanup([input_times, old_value]() { + (*input_times)[input_times->size() - 1] = old_value; + }); + } return NanosPerElementLocked() + static_cast( static_cast(OutputTimeForInputs(input_times)) * ratio); diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index 34c6c86538..f2419db3dc 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -516,10 +516,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { // `FileReaderIterator` and seek to the `cur_index`. switch (mode_) { case Mode::read: - iterator_.reset(new FileReaderIterator({dataset(), prefix()})); + iterator_.reset(new FileReaderIterator( + {dataset(), strings::StrCat(prefix(), "Impl")})); break; case Mode::write: - iterator_.reset(new FileWriterIterator({dataset(), prefix()})); + iterator_.reset(new FileWriterIterator( + {dataset(), strings::StrCat(prefix(), "Impl")})); } } @@ -866,12 +868,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { switch (mode_) { case Mode::read: - iterator_.reset( - new MemoryReaderIterator({dataset(), prefix()}, cache_)); + iterator_.reset(new MemoryReaderIterator( + {dataset(), strings::StrCat(prefix(), "Impl")}, cache_)); break; case Mode::write: - iterator_.reset( - new MemoryWriterIterator({dataset(), prefix()}, cache_)); + iterator_.reset(new MemoryWriterIterator( + {dataset(), strings::StrCat(prefix(), "Impl")}, cache_)); } } diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py index 45b77b5c20..a138436fff 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py @@ -48,18 +48,6 @@ class AssertNextDatasetTest(test_base.DatasetTestBase): "Map transformation instead."): sess.run(get_next) - def testAssertNextShort(self): - dataset = dataset_ops.Dataset.from_tensors(0).apply( - optimization.assert_next(["Map", "Whoops"])).map(lambda x: x) - iterator = dataset.make_one_shot_iterator() - get_next = iterator.get_next() - - with self.cached_session() as sess: - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - "Asserted next 2 transformations but encountered only 1."): - sess.run(get_next) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py index 48d7136f95..df082e9e35 100644 --- a/tensorflow/python/data/experimental/ops/prefetching_ops.py +++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py @@ -506,6 +506,15 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset): else: return super(_CopyToDeviceDataset, self).make_one_shot_iterator() + def make_initializable_iterator(self): + if self._is_gpu_target: + # TODO(b/116140813) : Enable dynamic optimizations. + options = dataset_ops.Options() + options.experimental_autotune = False + return self.with_options(options).make_initializable_iterator() + else: + return super(_CopyToDeviceDataset, self).make_initializable_iterator() + def _as_variant_tensor(self): with ops.device(self._target_device): return gen_dataset_ops.generator_dataset( diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index cdb883cac9..d7e37da48b 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -99,6 +99,16 @@ class Dataset(object): return options return Options() + def _apply_options(self): + dataset = self + options = self.options() + static_optimizations = options._static_optimizations() # pylint: disable=protected-access + if static_optimizations: + dataset = _OptimizeDataset(dataset, static_optimizations) + if options.experimental_autotune is not False: + dataset = _ModelDataset(dataset) + return dataset + def make_initializable_iterator(self, shared_name=None): """Creates an `Iterator` for enumerating the elements of this dataset. @@ -127,13 +137,7 @@ class Dataset(object): raise RuntimeError( "dataset.make_initializable_iterator is not supported when eager " "execution is enabled.") - dataset = self - options = self.options() - static_optimizations = options._static_optimizations() # pylint: disable=protected-access - if static_optimizations: - dataset = _OptimizeDataset(dataset, static_optimizations) - if options.experimental_autotune: - dataset = _ModelDataset(dataset) + dataset = self._apply_options() if shared_name is None: shared_name = "" if compat.forward_compatible(2018, 8, 3): @@ -163,7 +167,8 @@ class Dataset(object): RuntimeError: If eager execution is not enabled. """ if context.executing_eagerly(): - return iterator_ops.EagerIterator(self) + dataset = self._apply_options() + return iterator_ops.EagerIterator(dataset) else: raise RuntimeError("dataset.__iter__() is only supported when eager " "execution is enabled.") @@ -194,13 +199,7 @@ class Dataset(object): core_random_seed.set_random_seed( (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1)) - dataset = self - options = self.options() - static_optimizations = options._static_optimizations() # pylint: disable=protected-access - if static_optimizations: - dataset = _OptimizeDataset(dataset, static_optimizations) - if options.experimental_autotune: - dataset = _ModelDataset(dataset) + dataset = self._apply_options() return dataset._as_variant_tensor() # pylint: disable=protected-access try: diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py index b7d3aac206..3bcc20b333 100644 --- a/tensorflow/python/data/ops/multi_device_iterator_ops.py +++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py @@ -206,6 +206,10 @@ class MultiDeviceIterator(object): i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, device, self._dataset.output_shapes, self._dataset.output_types, self._dataset.output_classes) + # TODO(b/116140813) : Enable dynamic optimizations. + options = dataset_ops.Options() + options.experimental_autotune = False + ds = ds.with_options(options) if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) with ops.device(device): -- GitLab From 128903381b93289b1d19fef255b939d30339727c Mon Sep 17 00:00:00 2001 From: Russell Power Date: Wed, 10 Oct 2018 17:00:49 -0700 Subject: [PATCH 245/411] Turn on worker watchdog in TPUEstimator. PiperOrigin-RevId: 216618378 --- .../contrib/tpu/python/tpu/session_support.py | 18 ++++++++++++++++++ .../contrib/tpu/python/tpu/tpu_estimator.py | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py index 05264f5a46..8248256373 100644 --- a/tensorflow/contrib/tpu/python/tpu/session_support.py +++ b/tensorflow/contrib/tpu/python/tpu/session_support.py @@ -35,6 +35,8 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util +_WATCHDOG = None + class CoordinatorShutdownException(Exception): """Raised when the coordinator needs to shutdown.""" @@ -256,6 +258,22 @@ class WatchdogManager(threading.Thread): time.sleep(self.ping_interval) +def start_worker_watchdog(session, + devices=None, + ping_interval=60, + shutdown_timeout=3600): + """Start global worker watchdog to shutdown workers on coordinator exit.""" + global _WATCHDOG + if _WATCHDOG is None: + # Ensure we can send a few pings before we timeout! + ping_interval = min(shutdown_timeout / 10., ping_interval) + logging.info('Enabling watchdog timer with %d second timeout', + shutdown_timeout) + _WATCHDOG = WatchdogManager(session, devices, ping_interval, + shutdown_timeout) + _WATCHDOG.configure_and_run() + + class GracefulShutdownHook(session_run_hook.SessionRunHook): """Session hook that watches for shutdown events. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 367606ef27..23c30e3f06 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -480,6 +480,12 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): self._outfeed_controller = _OpQueueContext( name='OutfeedController', target=self._run_outfeed, args=(session,)) + # Enable the worker watchdog to terminate workers on coordinator exit. + watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0')) + if watchdog_timeout > 0: + session_support.start_worker_watchdog(session, + shutdown_timeout=watchdog_timeout) + def before_run(self, run_context): self._feed_error = None -- GitLab From 0be7b32fa4db37fe7e4a12ab12f87796ec07e54f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 10 Oct 2018 17:03:27 -0700 Subject: [PATCH 246/411] [XLA:GPU] Rename cudnn convolution passes. Make them shorter and more consistent. - CudnnConvolutionFoo -> CudnnConvFoo - PadInsertion -> CudnnConvPaddingLegalization - PadForTensorCores -> CudnnConvPadForSpeed (padding channel dimensions from 3 -> 4 is not a tensor-cores-related optimization and ideally should be run on P100s as well). PiperOrigin-RevId: 216618934 --- tensorflow/compiler/xla/service/gpu/BUILD | 64 +++++++++---------- .../xla/service/gpu/convolution_thunk.cc | 8 +-- .../xla/service/gpu/convolution_thunk.h | 2 +- ...cker.cc => cudnn_conv_algorithm_picker.cc} | 21 +++--- ...picker.h => cudnn_conv_algorithm_picker.h} | 17 +++-- ...r_cores.cc => cudnn_conv_pad_for_speed.cc} | 6 +- ...sor_cores.h => cudnn_conv_pad_for_speed.h} | 13 ++-- ...st.cc => cudnn_conv_pad_for_speed_test.cc} | 28 ++++---- ....cc => cudnn_conv_padding_legalization.cc} | 14 ++-- ...on.h => cudnn_conv_padding_legalization.h} | 12 ++-- ...ion_rewriter.cc => cudnn_conv_rewriter.cc} | 30 +++++---- ...ution_rewriter.h => cudnn_conv_rewriter.h} | 12 ++-- ...er_test.cc => cudnn_conv_rewriter_test.cc} | 45 ++++++------- ...olution_runner.cc => cudnn_conv_runner.cc} | 47 +++++++------- ...nvolution_runner.h => cudnn_conv_runner.h} | 29 ++++----- ...writer.cc => cudnn_fused_conv_rewriter.cc} | 4 +- ...rewriter.h => cudnn_fused_conv_rewriter.h} | 8 +-- .../xla/service/gpu/ir_emission_utils.h | 4 +- .../xla/service/gpu/ir_emitter_unnested.cc | 2 +- .../xla/service/gpu/nvptx_compiler.cc | 35 +++++----- .../compiler/xla/service/gpu/tests/BUILD | 4 +- ...t.cc => cudnn_fused_conv_rewriter_test.cc} | 23 ++++--- 22 files changed, 214 insertions(+), 214 deletions(-) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_algorithm_picker.cc => cudnn_conv_algorithm_picker.cc} (95%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_algorithm_picker.h => cudnn_conv_algorithm_picker.h} (78%) rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores.cc => cudnn_conv_pad_for_speed.cc} (98%) rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores.h => cudnn_conv_pad_for_speed.h} (72%) rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores_test.cc => cudnn_conv_pad_for_speed_test.cc} (86%) rename tensorflow/compiler/xla/service/gpu/{pad_insertion.cc => cudnn_conv_padding_legalization.cc} (97%) rename tensorflow/compiler/xla/service/gpu/{pad_insertion.h => cudnn_conv_padding_legalization.h} (78%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter.cc => cudnn_conv_rewriter.cc} (95%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter.h => cudnn_conv_rewriter.h} (74%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter_test.cc => cudnn_conv_rewriter_test.cc} (95%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_runner.cc => cudnn_conv_runner.cc} (90%) rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_runner.h => cudnn_conv_runner.h} (67%) rename tensorflow/compiler/xla/service/gpu/{cudnn_fused_convolution_rewriter.cc => cudnn_fused_conv_rewriter.cc} (98%) rename tensorflow/compiler/xla/service/gpu/{cudnn_fused_convolution_rewriter.h => cudnn_fused_conv_rewriter.h} (77%) rename tensorflow/compiler/xla/service/gpu/tests/{cudnn_fused_convolution_rewriter_test.cc => cudnn_fused_conv_rewriter_test.cc} (93%) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 62da43d68a..ea285994be 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -154,7 +154,7 @@ cc_library( deps = [ ":backend_configs", ":buffer_allocations", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":elemental_ir_emitter", ":gpu_constants", ":gpu_executable", @@ -323,7 +323,7 @@ cc_library( ], deps = [ ":buffer_allocations", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":hlo_execution_profiler", ":infeed_manager", ":ir_emission_utils", @@ -385,13 +385,13 @@ cc_library( ) cc_library( - name = "cudnn_convolution_algorithm_picker", - srcs = ["cudnn_convolution_algorithm_picker.cc"], - hdrs = ["cudnn_convolution_algorithm_picker.h"], + name = "cudnn_conv_algorithm_picker", + srcs = ["cudnn_conv_algorithm_picker.cc"], + hdrs = ["cudnn_conv_algorithm_picker.h"], deps = [ ":backend_configs", ":buffer_comparator", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":gpu_executable", ":ir_emission_utils", "//tensorflow/compiler/xla:literal_util", @@ -410,9 +410,9 @@ cc_library( ) cc_library( - name = "cudnn_convolution_runner", - srcs = ["cudnn_convolution_runner.cc"], - hdrs = ["cudnn_convolution_runner.h"], + name = "cudnn_conv_runner", + srcs = ["cudnn_conv_runner.cc"], + hdrs = ["cudnn_conv_runner.h"], deps = [ ":backend_configs", ":ir_emission_utils", @@ -432,9 +432,9 @@ cc_library( ) cc_library( - name = "cudnn_convolution_rewriter", - srcs = ["cudnn_convolution_rewriter.cc"], - hdrs = ["cudnn_convolution_rewriter.h"], + name = "cudnn_conv_rewriter", + srcs = ["cudnn_conv_rewriter.cc"], + hdrs = ["cudnn_conv_rewriter.h"], deps = [ ":backend_configs", ":ir_emission_utils", @@ -449,10 +449,10 @@ cc_library( ) tf_cc_test( - name = "cudnn_convolution_rewriter_test", - srcs = ["cudnn_convolution_rewriter_test.cc"], + name = "cudnn_conv_rewriter_test", + srcs = ["cudnn_conv_rewriter_test.cc"], deps = [ - ":cudnn_convolution_rewriter", + ":cudnn_conv_rewriter", ":ir_emission_utils", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", @@ -581,9 +581,9 @@ tf_cc_test( ) cc_library( - name = "pad_insertion", - srcs = ["pad_insertion.cc"], - hdrs = ["pad_insertion.h"], + name = "cudnn_conv_padding_legalization", + srcs = ["cudnn_conv_padding_legalization.cc"], + hdrs = ["cudnn_conv_padding_legalization.h"], deps = [ ":ir_emission_utils", "//tensorflow/compiler/xla:literal", @@ -600,9 +600,9 @@ cc_library( ) cc_library( - name = "pad_for_tensor_cores", - srcs = ["pad_for_tensor_cores.cc"], - hdrs = ["pad_for_tensor_cores.h"], + name = "cudnn_conv_pad_for_speed", + srcs = ["cudnn_conv_pad_for_speed.cc"], + hdrs = ["cudnn_conv_pad_for_speed.h"], deps = [ ":ir_emission_utils", "//tensorflow/compiler/xla:literal_util", @@ -614,11 +614,11 @@ cc_library( ) tf_cc_test( - name = "pad_for_tensor_cores_test", - srcs = ["pad_for_tensor_cores_test.cc"], + name = "cudnn_conv_pad_for_speed_test", + srcs = ["cudnn_conv_pad_for_speed_test.cc"], deps = [ + ":cudnn_conv_pad_for_speed", ":ir_emission_utils", - ":pad_for_tensor_cores", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo_matchers", @@ -660,9 +660,11 @@ cc_library( srcs = ["nvptx_compiler.cc"], hdrs = ["nvptx_compiler.h"], deps = [ - ":cudnn_convolution_algorithm_picker", - ":cudnn_convolution_rewriter", - ":cudnn_fused_convolution_rewriter", + ":cudnn_conv_algorithm_picker", + ":cudnn_conv_pad_for_speed", + ":cudnn_conv_padding_legalization", + ":cudnn_conv_rewriter", + ":cudnn_fused_conv_rewriter", ":fusion_merger", ":gpu_constants", ":gpu_copy_insertion", @@ -674,8 +676,6 @@ cc_library( ":ir_emission_utils", ":ir_emitter", ":multi_output_fusion", - ":pad_for_tensor_cores", - ":pad_insertion", ":partition_assignment", ":stream_assignment", ":stream_executor_util", @@ -966,9 +966,9 @@ tf_cc_test( ) cc_library( - name = "cudnn_fused_convolution_rewriter", - srcs = ["cudnn_fused_convolution_rewriter.cc"], - hdrs = ["cudnn_fused_convolution_rewriter.h"], + name = "cudnn_fused_conv_rewriter", + srcs = ["cudnn_fused_conv_rewriter.cc"], + hdrs = ["cudnn_fused_conv_rewriter.h"], deps = [ ":backend_configs", ":ir_emission_utils", diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 4effea637d..e1dffad304 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include "absl/strings/str_cat.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/types.h" @@ -56,9 +56,9 @@ Status ConvolutionThunk::ExecuteOnStream( buffer_allocations.GetDeviceAddress(scratch_buffer_); auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction()); - TF_RETURN_IF_ERROR(RunCudnnConvolution(cudnn_call_, - absl::MakeSpan(operand_se_buffers), - result_buffer, scratch, stream)); + TF_RETURN_IF_ERROR(RunCudnnConv(cudnn_call_, + absl::MakeSpan(operand_se_buffers), + result_buffer, scratch, stream)); void* ptrs[] = {result_buffer.opaque(), scratch.opaque()}; se::DeviceMemory tuple_addr( diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index f53bc54198..c71515490c 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -19,7 +19,7 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/service/gpu/thunk.h" diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc similarity index 95% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc index 6d4a72038f..6d6780fa1c 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/types/optional.h" @@ -145,9 +145,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { // cache misses and doing extra work. Overall, caching doesn't seem worth the // trouble, but we may want to revisit this if we ever find a model where // caching would speed up compilation a lot. -StatusOr -CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( - HloCustomCallInstruction* instr) { +StatusOr +CudnnConvAlgorithmPicker::PickBestAlgorithm(HloCustomCallInstruction* instr) { // TODO(timshen): for now only check fp16. It can be expanded to other types, // with some work on the HLO routines. const bool cross_check_enabled = @@ -253,10 +252,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( backend_config.set_algorithm(alg.algo_id()); backend_config.set_tensor_ops_enabled(alg.tensor_ops_enabled()); TF_RETURN_IF_ERROR(instr->set_backend_config(backend_config)); - bool launch_ok = RunCudnnConvolution(instr, absl::MakeSpan(operand_buffers), - result_buffer, &scratch_allocator, - &stream, &profile_result) - .ok(); + bool launch_ok = + RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer, + &scratch_allocator, &stream, &profile_result) + .ok(); if (launch_ok && profile_result.is_valid()) { const bool crash_on_checking_failure = @@ -328,7 +327,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( instr->ToString()); } -StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( +StatusOr CudnnConvAlgorithmPicker::RunOnInstruction( HloInstruction* instr) { CHECK(IsCustomCallToDnnConvolution(*instr)); @@ -378,7 +377,7 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( return true; } -StatusOr CudnnConvolutionAlgorithmPicker::RunOnComputation( +StatusOr CudnnConvAlgorithmPicker::RunOnComputation( HloComputation* computation) { std::vector convs; for (auto* instr : computation->instructions()) { @@ -395,7 +394,7 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnComputation( return changed; } -StatusOr CudnnConvolutionAlgorithmPicker::Run(HloModule* module) { +StatusOr CudnnConvAlgorithmPicker::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h similarity index 78% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h index 136c32210a..642af787af 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ #include "absl/time/time.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -31,18 +31,17 @@ namespace gpu { // Modifies CustomCalls to cudnn convolutions, choosing the best algorithm for // each and adding explicit scratch space to the CustomCalls. -class CudnnConvolutionAlgorithmPicker : public HloModulePass { +class CudnnConvAlgorithmPicker : public HloModulePass { public: // If the `allocator` parameter is not null, we will use it to allocate temp // memory while timing the various convolution algorithms. If it's null, // we'll use the default allocator on the StreamExecutor. - CudnnConvolutionAlgorithmPicker(se::StreamExecutor* stream_exec, - DeviceMemoryAllocator* allocator, - Compiler* compiler) + CudnnConvAlgorithmPicker(se::StreamExecutor* stream_exec, + DeviceMemoryAllocator* allocator, Compiler* compiler) : stream_exec_(stream_exec), allocator_(allocator), compiler_(compiler) {} absl::string_view name() const override { - return "cudnn-convolution-algorithm-picker"; + return "cudnn-conv-algorithm-picker"; } StatusOr Run(HloModule* module) override; @@ -67,4 +66,4 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc similarity index 98% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc index 8f1f5a7bf5..24b1f1af27 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" @@ -108,7 +108,7 @@ static HloInstruction* PadInstruction(HloInstruction* instr, static StatusOr PadFeaturesDims(HloCustomCallInstruction* conv) { CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0)) << "conv must use 0 scratch bytes, i.e. this pass must be run " - "before CudnnConvolutionAlgorithmPicker."; + "before CudnnConvAlgorithmPicker."; TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); const auto& dnums = conv->convolution_dimension_numbers(); @@ -252,7 +252,7 @@ static std::vector GetRelevantConvs( return convs; } -StatusOr PadForTensorCores::Run(HloModule* module) { +StatusOr CudnnConvPadForSpeed::Run(HloModule* module) { bool changed = false; for (HloComputation* comp : module->MakeNonfusionComputations()) { for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h similarity index 72% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h index e592a3774e..89a894e9d3 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -29,10 +29,13 @@ namespace gpu { // opposite of useful on other GPUs, so you should check what GPU you're // targeting before running this pass. // +// TODO(jlebar): Rework this. For one thing, it should not be Volta-only. +// Padding input channels 3 to 4 is (we think) applicable to Pascal as well. +// // TODO(jlebar): Also pad dots. -class PadForTensorCores : public HloModulePass { +class CudnnConvPadForSpeed : public HloModulePass { public: - absl::string_view name() const override { return "pad for tensor cores"; } + absl::string_view name() const override { return "cudnn-conv-pad-for-speed"; } StatusOr Run(HloModule* module) override; }; @@ -40,4 +43,4 @@ class PadForTensorCores : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc similarity index 86% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc index 5c92b0dcb8..ec403021e6 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" @@ -29,9 +29,9 @@ namespace { namespace op = xla::testing::opcode_matchers; using ::testing::_; -class PadForTensorCoresTest : public HloVerifiedTestBase {}; +class CudnnConvPadForSpeedTest : public HloVerifiedTestBase {}; -TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -42,7 +42,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); SCOPED_TRACE(module().ToString()); @@ -55,7 +55,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 48, 40}))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -66,7 +66,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget, op::Pad(op::Parameter(0), _), @@ -77,7 +77,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 40, 48}))); } -TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -88,7 +88,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvForwardCallTarget, op::Parameter(0), @@ -96,7 +96,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) { _)); } -TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -108,7 +108,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { custom_call_target="__cudnn$convBackwardInput" ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( @@ -117,7 +117,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { _))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -129,7 +129,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( @@ -138,7 +138,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { _))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { +TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -150,7 +150,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc similarity index 97% rename from tensorflow/compiler/xla/service/gpu/pad_insertion.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc index ae7abca7c6..d7829045cc 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/literal.h" @@ -132,7 +132,8 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window, } } // namespace -bool PadInsertion::CanonicalizeForwardConvolution(HloInstruction* conv) { +bool CudnnConvPaddingLegalization::CanonicalizeForwardConvolution( + HloInstruction* conv) { if (IsForwardConvolutionCanonical(*conv)) { return false; } @@ -187,7 +188,7 @@ void IncreasePaddingHighBy(int64 delta, WindowDimension* window_dim) { } } // namespace -bool PadInsertion::CanonicalizeBackwardFilterConvolution( +bool CudnnConvPaddingLegalization::CanonicalizeBackwardFilterConvolution( HloInstruction* backward_conv) { CHECK_EQ(backward_conv->custom_call_target(), kCudnnConvBackwardFilterCallTarget); @@ -260,7 +261,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( return true; } -bool PadInsertion::CanonicalizeBackwardInputConvolution( +bool CudnnConvPaddingLegalization::CanonicalizeBackwardInputConvolution( HloInstruction* backward_conv) { if (window_util::HasSymmetricPadding(backward_conv->window())) { return false; @@ -377,7 +378,8 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution( return true; } -StatusOr PadInsertion::RunOnComputation(HloComputation* computation) { +StatusOr CudnnConvPaddingLegalization::RunOnComputation( + HloComputation* computation) { bool changed = false; std::vector convs; for (auto* instr : computation->instructions()) { @@ -402,7 +404,7 @@ StatusOr PadInsertion::RunOnComputation(HloComputation* computation) { return changed; } -StatusOr PadInsertion::Run(HloModule* module) { +StatusOr CudnnConvPaddingLegalization::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h similarity index 78% rename from tensorflow/compiler/xla/service/gpu/pad_insertion.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h index 25cdf64c4c..7d1b075517 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -24,9 +24,11 @@ namespace gpu { // An HLO pass that canonicalizes convolution instructions for GPU codegen. It // inserts Pad instructions before Convolution instructions with uncanonicalized // padding, so that they can be lowered to cuDNN convolution. -class PadInsertion : public HloModulePass { +class CudnnConvPaddingLegalization : public HloModulePass { public: - absl::string_view name() const override { return "pad insertion"; } + absl::string_view name() const override { + return "cudnn-conv-padding-legalization"; + } StatusOr Run(HloModule* module) override; @@ -41,4 +43,4 @@ class PadInsertion : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc similarity index 95% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index 437d25727e..5cea66de38 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" #include #include @@ -188,9 +188,9 @@ std::tuple MatchBackwardFilter( // the amount of high padding the same as the amount of low padding as long // as it is between min_padding_high and max_padding_high. If it is not in // that range, we pick the one that's closest to dim->padding_low() and let - // PadInsertion canonicalize the resultant backward convolution later. - // Picking the closest one minimizes the cost of the kPad instruction to be - // inserted by PadInsertion. + // CudnnConvPaddingLegalization canonicalize the resultant backward + // convolution later. Picking the closest one minimizes the cost of the kPad + // instruction to be inserted by CudnnConvPaddingLegalization. if (dim->padding_low() >= min_padding_high && dim->padding_low() <= max_padding_high) { dim->set_padding_high(dim->padding_low()); @@ -207,7 +207,8 @@ std::tuple MatchBackwardFilter( "negative padding (" << dim->padding_high() << ") on right/bottom of the weight gradients, which is not " - "supported by PadInsertion (b/32744257). Falling back to " + "supported by CudnnConvPaddingLegalization (b/32744257). " + "Falling back to " "unfused convolution for instruction: " << conv->ToString(); return no_match_result; @@ -342,7 +343,8 @@ MatchBackwardInput(HloInstruction* conv) { LOG(ERROR) << "The low padding of the backward convolution would be negative (" << backward_padding_low - << "), which isn't supported by PadInsertion for now (b/32744257)."; + << "), which isn't supported by CudnnConvPaddingLegalization " + "for now (b/32744257)."; return no_match_result; } dim->set_padding_low(backward_padding_low); @@ -371,8 +373,8 @@ MatchBackwardInput(HloInstruction* conv) { dim->set_padding_high(backward_padding_low); } else { // Otherwise, we choose the amount that's closest to backward_padding_low, - // and PadInsertion will later insert kSlice instructions to enforce even - // padding. + // and CudnnConvPaddingLegalization will later insert kSlice + // instructions to enforce even padding. // // For example, consider the backward convolution pattern // @@ -398,9 +400,9 @@ MatchBackwardInput(HloInstruction* conv) { dim->set_padding_high(max_padding_high); } } - // PadInsertion doesn't handle backward input convolution with negative - // padding for now. So fall back to unfused convolution in case of negative - // padding. For example, + // CudnnConvPaddingLegalization doesn't handle backward input + // convolution with negative padding for now. So fall back to unfused + // convolution in case of negative padding. For example, // ABCD = Conv(abc, reverse(xy), padding_high=2) // could be fused to // ABCD = BackwardInputConv(abc, xy, padding_low=1, padding_high=-1) @@ -410,8 +412,8 @@ MatchBackwardInput(HloInstruction* conv) { "negative padding (" << dim->padding_high() << ") on right/bottom of the activations, which is not " - "supported by PadInsertion (b/32744257). Falling back to " - "unfused convolution for instruction: " + "supported by CudnnConvPaddingLegalization (b/32744257). " + "Falling back to unfused convolution for instruction: " << conv->ToString(); return no_match_result; } @@ -555,7 +557,7 @@ StatusOr RunOnComputation(HloComputation* computation) { } } // namespace -StatusOr CudnnConvolutionRewriter::Run(HloModule* module) { +StatusOr CudnnConvRewriter::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h similarity index 74% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h index 8d7c6fdab5..d8ec72c27b 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -24,11 +24,9 @@ namespace gpu { // Rewrites plain convolutions, backwards-filter convolutions, and // backwards-input convolutions into CustomCall HLOs that call into cuDNN. -class CudnnConvolutionRewriter : public HloModulePass { +class CudnnConvRewriter : public HloModulePass { public: - absl::string_view name() const override { - return "cudnn-convolution-rewriter"; - } + absl::string_view name() const override { return "cudnn-conv-rewriter"; } StatusOr Run(HloModule* module) override; }; @@ -36,4 +34,4 @@ class CudnnConvolutionRewriter : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc similarity index 95% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc index d237f8930b..543160df8b 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -34,9 +34,9 @@ namespace { namespace op = xla::testing::opcode_matchers; using ::testing::_; -class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { +class CudnnConvRewriterTest : public HloVerifiedTestBase { public: - CudnnConvolutionRewriterTest() + CudnnConvRewriterTest() : HloVerifiedTestBase(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false) { for (int i = 0; i < 2; ++i) { @@ -85,7 +85,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { protected: bool RunPass(HloModule* module) { - return CudnnConvolutionRewriter().Run(module).ValueOrDie(); + return CudnnConvRewriter().Run(module).ValueOrDie(); } // A convolution window with stride 1 and zero padding. The size fields are @@ -95,7 +95,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_; }; -TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) { HloComputation::Builder builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -123,7 +123,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) { op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveEquivalentToForwardConvolution) { HloComputation::Builder builder(TestName()); HloInstruction* activations = @@ -152,8 +152,7 @@ TEST_F(CudnnConvolutionRewriterTest, } // Extracted from block35 training. -TEST_F(CudnnConvolutionRewriterTest, - BackwardFilterConvolveWithPaddedActivations) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedActivations) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -183,8 +182,7 @@ TEST_F(CudnnConvolutionRewriterTest, } // Extracted from inception v3 training. -TEST_F(CudnnConvolutionRewriterTest, - BackwardFilterConvolveWithPaddedGradients) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedGradients) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -213,7 +211,7 @@ TEST_F(CudnnConvolutionRewriterTest, op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithUnevenPadding) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -242,7 +240,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) { op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveEvenPadding) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -307,7 +305,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) { // Convolve([abc], [x], base_dilation=2) // = Convolve([abc], Reverse([x]), base_dilation=2) // = BackwardInputConvolve([abc], [x], stride=2) -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1Filter) { auto builder = HloComputation::Builder(TestName()); // NHWC dimension order. HloInstruction* output = @@ -341,7 +339,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) { // BackwardInputConvolve([abc], [x], stride=1) is equivalent to // ForwardConvolve([abc], [x], stride=1). No need to fold it into backward input // convolution. -TEST_F(CudnnConvolutionRewriterTest, +TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1FilterEquivalentToForwardConvolve) { auto builder = HloComputation::Builder(TestName()); // NHWC dimension order. @@ -385,8 +383,7 @@ TEST_F(CudnnConvolutionRewriterTest, // 20x10x10x192 // // Gradients are padded unevenly. -TEST_F(CudnnConvolutionRewriterTest, - BackwardInputConvolveUnevenPaddingOnGradients) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnGradients) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -436,7 +433,7 @@ TEST_F(CudnnConvolutionRewriterTest, // Similar to BackwardInputConvolveUnevenPadding, but the low padding of the // gradients exceeds kernel_size - 1. Therefore, this pattern cannot be fused. -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -488,9 +485,8 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { // padding_low=2, padding_high=1, base_dilation=2) // // We should fuse BC even though padding on activations is uneven, because -// PadInsertion will canonicalize the fusion HLO. -TEST_F(CudnnConvolutionRewriterTest, - BackwardInputConvolveUnevenPaddingOnActivations) { +// CudnnConvPaddingLegalization will canonicalize the fusion HLO. +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnActivations) { auto builder = HloComputation::Builder(TestName()); // The gradients are in NCHW layout. HloInstruction* output = @@ -543,9 +539,10 @@ TEST_F(CudnnConvolutionRewriterTest, // BC = BackwardInput(FC) does: // [4] = conv([3], reverse([2]), padding_high=2) // -// We currently don't fuse BC because PadInsertion doesn't support negative -// padding on the gradients of backward convolution (b/32744257). -TEST_F(CudnnConvolutionRewriterTest, +// We currently don't fuse BC because CudnnConvPaddingLegalization +// doesn't support negative padding on the gradients of backward convolution +// (b/32744257). +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveNegativePaddingHighOnActivations) { auto builder = HloComputation::Builder(TestName()); // The gradients are in NCHW layout. @@ -586,7 +583,7 @@ TEST_F(CudnnConvolutionRewriterTest, // Check that we will materialize a reversed version of a constant in order to // pattern-match a backwards input convolution. -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveConstantFilter) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveConstantFilter) { Array4D constant_arr(4, 4, 2, 2); constant_arr.FillIota(0); string constant_str = diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc similarity index 90% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc index a809c22b33..0b4fdf7162 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" @@ -110,10 +110,10 @@ class ScratchBufAllocator : public se::ScratchAllocator { }; template -Status RunCudnnConvolutionImpl(CudnnConvParams params, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConvImpl(CudnnConvParams params, + se::ScratchAllocator* scratch_allocator, + se::Stream* stream, + se::dnn::ProfileResult* profile_result) { CudnnConvKind kind = params.kind; const Shape& input_shape = *params.input_shape; const Shape& filter_shape = *params.filter_shape; @@ -380,22 +380,21 @@ StatusOr GetCudnnConvParams( } // anonymous namespace -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::DeviceMemoryBase scratch_buf, se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::DeviceMemoryBase scratch_buf, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { ScratchBufAllocator scratch_allocator(scratch_buf); - return RunCudnnConvolution(conv, operand_buffers, result_buffer, - &scratch_allocator, stream, profile_result); + return RunCudnnConv(conv, operand_buffers, result_buffer, &scratch_allocator, + stream, profile_result); } -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::ScratchAllocator* scratch_allocator, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { TF_ASSIGN_OR_RETURN(CudnnConvParams params, GetCudnnConvParams(conv, operand_buffers, result_buffer)); @@ -403,14 +402,14 @@ Status RunCudnnConvolution(const HloCustomCallInstruction* conv, conv->shape().tuple_shapes(0).element_type(); switch (output_primitive_type) { case F16: - return RunCudnnConvolutionImpl(params, scratch_allocator, - stream, profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); case F32: - return RunCudnnConvolutionImpl(params, scratch_allocator, stream, - profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); case F64: - return RunCudnnConvolutionImpl(params, scratch_allocator, stream, - profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); default: LOG(FATAL) << ShapeUtil::HumanString(*params.output_shape); } diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h similarity index 67% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h index 61aec1cecc..edbc75a94a 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -42,20 +42,19 @@ namespace gpu { // allocator and take note of how much memory is used. The next time you call // the same conv, you can provide an explicitly preallocated scratch buffer of // that size, if you like. -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::DeviceMemoryBase scratch_buf, se::Stream* stream, - se::dnn::ProfileResult* profile_result = nullptr); - -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result = nullptr); +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::DeviceMemoryBase scratch_buf, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); + +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::ScratchAllocator* scratch_allocator, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc similarity index 98% rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc index d508cbc2e1..8ac11bcf65 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" @@ -242,7 +242,7 @@ StatusOr> TryRewriteToCudnnForwardRelu( } // namespace -StatusOr CudnnFusedConvolutionRewriter::Run(HloModule* module) { +StatusOr CudnnFusedConvRewriter::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { std::vector matches; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h similarity index 77% rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h index bd12aadded..613ed8dbdc 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -22,7 +22,7 @@ limitations under the License. namespace xla { namespace gpu { -class CudnnFusedConvolutionRewriter : public HloModulePass { +class CudnnFusedConvRewriter : public HloModulePass { public: absl::string_view name() const override { return "cudnn-fused-convolution-rewriter"; @@ -34,4 +34,4 @@ class CudnnFusedConvolutionRewriter : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h index a64a616ab1..f373d4a839 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h @@ -108,9 +108,9 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo); // memory used by cudnn. Callers shouldn't inspect scratch_memory, as its value // is not well-defined. // -// CudnnConvolutionRewriter lowers kConvolution HLOs to these custom calls. +// CudnnConvRewriter lowers kConvolution HLOs to these custom calls. // When it does so, it chooses algorithm -1 and 0 bytes of scratch space. Later -// on in the pipeline, CudnnConvolutionAlgorithmChooser chooses an explicit +// on in the pipeline, CudnnConvAlgorithmChooser chooses an explicit // algorithm for each conv and sets the amount of scratch space needed. // // (Representing the scratch memory as an output may seem strange at first, but diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 09486d291a..851060da6e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -43,7 +43,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h" #include "tensorflow/compiler/xla/service/gpu/copy_thunk.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/fft_thunk.h" #include "tensorflow/compiler/xla/service/gpu/for_thunk.h" #include "tensorflow/compiler/xla/service/gpu/gemm_thunk.h" diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index 5409f65589..b394784fde 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -38,9 +38,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h" #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h" @@ -54,8 +56,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h" #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.h" #include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" -#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" @@ -201,21 +201,22 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, { // Convert convolutions into CustomCalls to cudnn, then canonicalize them - // (PadInsertion). + // (CudnnConvPaddingLegalization). HloPassPipeline pipeline("conv_canonicalization"); pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); - pipeline.AddPass(); - pipeline.AddPass(); - pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); if (IsVoltaOrLater(*stream_exec)) { - pipeline.AddPass(); - // PadForTensorCores leaves behind unnecessary tuple/get-tuple-element + pipeline.AddPass(); + // CudnnConvPadForSpeed leaves behind unnecessary tuple/get-tuple-element // pairs that TupleSimplifier fixes. pipeline.AddPass(); } - // CudnnConvolutionRewriter, PadInsertion and PadForTensorCores may add - // instructions which can be simplified by constant folding. + // CudnnConvRewriter, CudnnConvPaddingLegalization and + // CudnnConvPadForSpeed may add instructions which can be simplified by + // constant folding. pipeline.AddPass(); TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } @@ -252,7 +253,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // Choose the fastest algorithm for each conv. // // We pick the algorithm before fusion so we can generate better HLO. After - // CudnnConvolutionRewriter, our convolutions are CustomCalls which return a + // CudnnConvRewriter, our convolutions are CustomCalls which return a // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of // scratch: // @@ -270,12 +271,12 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // The new tuple and gte instructions then be simplified away, because // nobody is expected to use the scratch value. // - // However, if we were to run CudnnConvolutionAlgorithmPicker after fusion + // However, if we were to run CudnnConvAlgorithmPicker after fusion // the gte(customcall, 0) would probably already be into a fusion node. We // can't simplify across HloComputation boundaries, so in this case we // wouldn't be able to simplify away the new_tuple bits. - pipeline.AddPass( - stream_exec, device_allocator, compiler); + pipeline.AddPass(stream_exec, device_allocator, + compiler); // Clean up new_tuple described above. pipeline.AddPass(); diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index 1f0436278c..d22ffc1754 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -211,8 +211,8 @@ tf_cc_test( ) tf_cc_test( - name = "cudnn_fused_convolution_rewriter_test", - srcs = ["cudnn_fused_convolution_rewriter_test.cc"], + name = "cudnn_fused_conv_rewriter_test", + srcs = ["cudnn_fused_conv_rewriter_test.cc"], tags = tf_cuda_tests_tags(), deps = [ ":gpu_codegen_test", diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc similarity index 93% rename from tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc rename to tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc index 5632cac186..8bdb4c8080 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc @@ -22,7 +22,7 @@ namespace xla { namespace gpu { namespace { -class CudnnFusedConvolutionRewriterTest : public HloTestBase { +class CudnnFusedConvRewriterTest : public HloTestBase { protected: string GetOptimizedHlo(absl::string_view hlo_string) { return backend() @@ -66,7 +66,7 @@ class CudnnFusedConvolutionRewriterTest : public HloTestBase { } }; -TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestConvOnly) { // max(0, conv(x, w)); TestMatchWithAllTypes(R"( HloModule Test @@ -83,7 +83,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) { +TEST_F(CudnnFusedConvRewriterTest, TestBias) { // max(0, conv(x, w) + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -103,7 +103,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestSideInputOnly) { // max(0, conv(x, w) + side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -122,7 +122,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestBiasAndSideInput) { // max(0, conv(x, w) + side_input + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -144,7 +144,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConv) { // max(0, 0.999994934 * conv(x, w)); TestMatchWithAllTypes(R"( HloModule Test @@ -164,7 +164,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndSideInput) { // max(0, conv(x, w) + 0.899994934 * side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -186,7 +186,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInput) { // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -211,8 +211,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, - TestScaledConvAndScaledSideInputWithBias) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInputWithBias) { // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -240,7 +239,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestMatchMaxZeroOnly) { // max(0.1, conv(x, w)) shouldn't match. TestNotMatchWithAllTypes(R"( HloModule Test @@ -257,7 +256,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchBroadcastedBiasOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestMatchBroadcastedBiasOnly) { // max(0, conv(x, w) + side_input1 + side_input2) shouldn't match. TestNotMatchWithAllTypes(R"( HloModule Test -- GitLab From 3abfe2cd9befa263de57edfae7d4c0d29c9c9182 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 17:07:19 -0700 Subject: [PATCH 247/411] Allow the XRTCompile op to return the ProgramShape resulted form the XLA compilation. PiperOrigin-RevId: 216619617 --- .../xla/service/compile_only_service.cc | 2 + .../compiler/xrt/kernels/xrt_compile_ops.cc | 19 +++- .../compiler/xrt/kernels/xrt_execute_op.cc | 8 -- .../compiler/xrt/ops/xrt_compile_ops.cc | 7 +- tensorflow/compiler/xrt/tests/BUILD | 13 ++- tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +++++++++++++++++- 6 files changed, 135 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index 96bd2616f5..bd5045b9b9 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -89,6 +89,8 @@ CompileOnlyService::CompileAheadOfTime( const auto& program_shape = instance.computation.program_shape(); ExecutionOptions execution_options; *execution_options.mutable_debug_options() = debug_options; + *execution_options.mutable_shape_with_output_layout() = + *instance.result_layout; TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(program_shape, instance.argument_layouts, diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 1d4f8d97f2..1ab836a496 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -166,10 +166,21 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Compiling XLA executable"; return Compile(ctx, computation_proto, program); })); - - Tensor output(DT_INT64, TensorShape({})); - output.scalar()() = uid; - ctx->set_output(0, output); + std::unique_ptr entry; + OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry)); + + Tensor handle_output(DT_INT64, TensorShape({})); + handle_output.scalar()() = uid; + ctx->set_output(0, handle_output); + + xla::LocalExecutable* executable = entry->get().get_executable(); + xla::ProgramShape program_shape = executable->executable() + ->module() + .entry_computation() + ->ComputeProgramShape(); + Tensor program_shape_output(DT_STRING, TensorShape({1})); + program_shape_output.vec()(0) = program_shape.SerializeAsString(); + ctx->set_output(1, program_shape_output); } XRTCompileOp::~XRTCompileOp() = default; diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index 257b054f16..3a1e03280a 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -64,14 +64,6 @@ uint32 GetXLARandomSeed() { return counter.fetch_add(2); } -// Looks up the input `key` in the compilation cache. -Status GetComputationCacheEntry( - XRTCompilationCache* cache, int64 key, - std::unique_ptr* entry) { - TF_RETURN_IF_ERROR(cache->Lookup(key, entry)); - return Status::OK(); -} - // Populates `inputs` with the input tensors to the computation. Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm, bool release_inputs, diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc index 5cfc8711f9..7b3b50c695 100644 --- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc @@ -23,7 +23,12 @@ namespace tensorflow { REGISTER_OP("XRTCompile") .Input("computation: string") .Output("handle: int64") - .SetShapeFn(tensorflow::shape_inference::ScalarShape) + .Output("program_shape: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + c->set_output(1, c->UnknownShapeOfRank(1)); + return Status::OK(); + }) .Doc( R"( Reads a computation proto, compiles it, and places it in the global compilation diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD index b6dcfc4eb9..be44a3474a 100644 --- a/tensorflow/compiler/xrt/tests/BUILD +++ b/tensorflow/compiler/xrt/tests/BUILD @@ -29,8 +29,11 @@ cc_library( "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xrt:xrt_proto", "//tensorflow/compiler/xrt:xrt_server", "//tensorflow/compiler/xrt/cc:xrt_ops", @@ -49,7 +52,10 @@ tf_cc_test( name = "raw_api_test_cpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_CPU"], + args = [ + "--xla_test_device=XLA_CPU", + "--xla_platform=CPU", + ], deps = [ ":raw_api_test_lib", "//tensorflow/compiler/jit:xla_cpu_device", @@ -60,7 +66,10 @@ tf_cuda_cc_test( name = "raw_api_test_gpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_GPU"], + args = [ + "--xla_test_device=XLA_GPU", + "--xla_platform=GPU", + ], tags = tf_cuda_tests_tags(), deps = [ ":raw_api_test_lib", diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index 9fc01e6304..ee6734020d 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -22,10 +22,13 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h" @@ -43,6 +46,7 @@ namespace tensorflow { namespace { string* xla_test_device_ptr; // initial value set in main() +string* xla_platform_ptr; // initial value set in main() string DeviceFromFlag() { string xla_test_device = *xla_test_device_ptr; @@ -145,6 +149,28 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation, *dst = *snapshot; } +xla::ProgramShape XlaCompiledProgramShape( + const xla::XlaComputation& computation, + const xla::ProgramShape& input_program_shape) { + se::Platform* platform = + xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie(); + xla::LocalClient* client = + xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); + xla::ExecutableBuildOptions exec_options; + exec_options.set_result_layout(input_program_shape.result()); + std::vector parameters_shapes; + for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) { + parameters_shapes.push_back(&input_program_shape.parameters(i)); + } + auto local_executable = + client->Compile(computation, parameters_shapes, exec_options) + .ValueOrDie(); + return local_executable->executable() + ->module() + .entry_computation() + ->ComputeProgramShape(); +} + TEST(RawApiTest, ReadAndWriteState) { xrt::XLAAllocation alloc; alloc.set_device_ordinal(0); @@ -338,20 +364,87 @@ TEST(RawApiTest, CompileAndExecute) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({read_back}, &outputs)); + TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs)); xla::LiteralProto response; EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); auto expected = xla::LiteralUtil::CreateR1({27.0f, 21.0f}); EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 2); +} + +TEST(RawApiTest, CompileWithXlaReturnShapes) { + xla::XlaBuilder builder("XrtXlaShapes"); + auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128}); + auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5}); + // Clear layouts to signal XLA we are ready to get whatever are coming out of + // the compilation process. + xla::LayoutUtil::ClearLayout(&input_shape); + xla::LayoutUtil::ClearLayout(&kernel_shape); + auto param_shape = + xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape}); + auto param = xla::Parameter(&builder, 0, param_shape, "param"); + auto input = xla::GetTupleElement(param, 0); + auto kernel = xla::GetTupleElement(param, 1); + xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame); + TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build()); + + auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result(); + // Clear the result shape layout to tell XLA we are accepting whatever are + // coming out of the compilation process. + xla::LayoutUtil::ClearLayout(&result_shape); + + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = param_shape; + *shapes->mutable_result() = result_shape; + StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(), + {c_handle.program_shape}, {release}, &outputs)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 1); + + VLOG(2) << "Param: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0)); + VLOG(2) << "Result: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.result()); + + xla::ProgramShape xla_program_shape = + XlaCompiledProgramShape(xla_computation, *shapes); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(), + xla_program_shape.result().layout())); } TEST(RawApiTest, CompileAndExecuteZeroArg) { @@ -371,7 +464,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) { auto computation = ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); auto c_handle = ops::XRTCompile(root, computation); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, std::initializer_list({})); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -420,7 +513,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -455,7 +548,7 @@ TEST(RawApiTest, LeakCompilationReference) { ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({c_handle}, &outputs)); + TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs)); } } // namespace @@ -464,9 +557,12 @@ TEST(RawApiTest, LeakCompilationReference) { int main(int argc, char** argv) { tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU"); + tensorflow::xla_platform_ptr = new tensorflow::string("CPU"); std::vector flag_list = { tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr, "Tensorflow device type to use for test, e.g., XLA_CPU"), + tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr, + "The XLA platform to select for the device"), }; tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list); const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); -- GitLab From 331683cb22246d116778c850fcbf1cc2cc74c9ce Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Wed, 10 Oct 2018 17:25:25 -0700 Subject: [PATCH 248/411] Include in cost_estimator.h INFINITY define requires the cmath header. PiperOrigin-RevId: 216621867 --- tensorflow/core/grappler/costs/cost_estimator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h index 811e923b87..e3b3a36b09 100644 --- a/tensorflow/core/grappler/costs/cost_estimator.h +++ b/tensorflow/core/grappler/costs/cost_estimator.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_GRAPPLER_COSTS_COST_ESTIMATOR_H_ #include +#include #include #include "tensorflow/core/lib/core/status.h" -- GitLab From 2b010f2e48c4da512a87ab568600c8befe7147a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 17:30:27 -0700 Subject: [PATCH 249/411] Adds a simple util to build a GrapplerItem from a MetaGraphDef stored in a file. PiperOrigin-RevId: 216622520 --- .../core/grappler/grappler_item_builder.cc | 9 ++++ .../core/grappler/grappler_item_builder.h | 6 +++ tensorflow/core/grappler/inputs/utils.cc | 16 ++++-- tensorflow/core/grappler/inputs/utils.h | 10 +++- tensorflow/core/grappler/inputs/utils_test.cc | 49 +++++++++++++++++++ 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 369046666d..24245a41c3 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -630,5 +630,14 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( return new_item; } +std::unique_ptr GrapplerItemFromMetaGraphDefFile( + const string& id, const string& meta_graph_file, const ItemConfig& cfg) { + MetaGraphDef meta_graph; + if (!ReadMetaGraphDefFromFile(meta_graph_file, &meta_graph).ok()) { + return nullptr; + } + return GrapplerItemFromMetaGraphDef(id, meta_graph, cfg); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index 1698587f8c..7102cf94c6 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -58,6 +58,12 @@ struct ItemConfig { std::unique_ptr GrapplerItemFromMetaGraphDef( const string& id, const MetaGraphDef& meta_graph, const ItemConfig& cfg); +// Factory method for creating a GrapplerItem from a file +// containing a MetaGraphDef in either binary or text format. +// Returns nullptr if the given meta_graph cannot be converted. +std::unique_ptr GrapplerItemFromMetaGraphDefFile( + const string& id, const string& meta_graph_file, const ItemConfig& cfg); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/inputs/utils.cc b/tensorflow/core/grappler/inputs/utils.cc index def9198a69..03f59701ce 100644 --- a/tensorflow/core/grappler/inputs/utils.cc +++ b/tensorflow/core/grappler/inputs/utils.cc @@ -35,11 +35,19 @@ bool FileExists(const string& file, Status* status) { return status->ok(); } -Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path, - GraphDef* result) { +Status ReadGraphDefFromFile(const string& graph_def_path, GraphDef* result) { Status status; - if (FileExists(graph_def_pbtxt_path, &status)) { - return ReadTextProto(Env::Default(), graph_def_pbtxt_path, result); + if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) { + return ReadTextProto(Env::Default(), graph_def_path, result); + } + return status; +} + +Status ReadMetaGraphDefFromFile(const string& graph_def_path, + MetaGraphDef* result) { + Status status; + if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) { + return ReadTextProto(Env::Default(), graph_def_path, result); } return status; } diff --git a/tensorflow/core/grappler/inputs/utils.h b/tensorflow/core/grappler/inputs/utils.h index 4b9cb0a9ad..2588e380fe 100644 --- a/tensorflow/core/grappler/inputs/utils.h +++ b/tensorflow/core/grappler/inputs/utils.h @@ -20,7 +20,9 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/protobuf/meta_graph.pb.h" namespace tensorflow { namespace grappler { @@ -31,8 +33,12 @@ bool FilesExist(const std::set& files); bool FileExists(const string& file, Status* status); -Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path, - GraphDef* result); +// Reads GraphDef from file in either text or raw serialized format. +Status ReadGraphDefFromFile(const string& graph_def_path, GraphDef* result); + +// Reads MetaGraphDef from file in either text or raw serialized format. +Status ReadMetaGraphDefFromFile(const string& meta_graph_def_path, + MetaGraphDef* result); } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/inputs/utils_test.cc b/tensorflow/core/grappler/inputs/utils_test.cc index 694a855280..c8af2aa738 100644 --- a/tensorflow/core/grappler/inputs/utils_test.cc +++ b/tensorflow/core/grappler/inputs/utils_test.cc @@ -31,6 +31,25 @@ class UtilsTest : public ::testing::Test { non_existent_file_ = io::JoinPath(BaseDir(), "non_existent_file.txt"); actual_file_ = io::JoinPath(BaseDir(), "test_file.txt"); TF_CHECK_OK(WriteStringToFile(env_, actual_file_, "Some test data")); + + text_graph_def_file_ = io::JoinPath(BaseDir(), "text_graph_def_file.txt"); + binary_graph_def_file_ = + io::JoinPath(BaseDir(), "binary_graph_def_file.txt"); + text_meta_graph_def_file_ = + io::JoinPath(BaseDir(), "text_meta_graph_def_file.txt"); + binary_meta_graph_def_file_ = + io::JoinPath(BaseDir(), "binary_meta_graph_def_file.txt"); + + auto node = graph_def_.add_node(); + node->set_name("foo"); + node->set_op("bar"); + TF_CHECK_OK(WriteTextProto(env_, text_graph_def_file_, graph_def_)); + TF_CHECK_OK(WriteBinaryProto(env_, binary_graph_def_file_, graph_def_)); + *meta_graph_def_.mutable_graph_def() = graph_def_; + TF_CHECK_OK( + WriteTextProto(env_, text_meta_graph_def_file_, meta_graph_def_)); + TF_CHECK_OK( + WriteBinaryProto(env_, binary_meta_graph_def_file_, meta_graph_def_)); } void TearDown() override { @@ -39,8 +58,14 @@ class UtilsTest : public ::testing::Test { env_->DeleteRecursively(BaseDir(), &undeleted_files, &undeleted_dirs)); } + GraphDef graph_def_; + MetaGraphDef meta_graph_def_; string non_existent_file_; string actual_file_; + string text_graph_def_file_; + string binary_graph_def_file_; + string text_meta_graph_def_file_; + string binary_meta_graph_def_file_; Env* env_ = Env::Default(); }; @@ -58,6 +83,30 @@ TEST_F(UtilsTest, FilesExist) { EXPECT_TRUE(status[1].ok()); } +TEST_F(UtilsTest, ReadGraphDefFromFile_Text) { + GraphDef result; + TF_CHECK_OK(ReadGraphDefFromFile(text_graph_def_file_, &result)); + EXPECT_EQ(result.DebugString(), graph_def_.DebugString()); +} + +TEST_F(UtilsTest, ReadGraphDefFromFile_Binary) { + GraphDef result; + TF_CHECK_OK(ReadGraphDefFromFile(binary_graph_def_file_, &result)); + EXPECT_EQ(result.DebugString(), graph_def_.DebugString()); +} + +TEST_F(UtilsTest, ReadMetaGraphDefFromFile_Text) { + MetaGraphDef result; + TF_CHECK_OK(ReadMetaGraphDefFromFile(text_meta_graph_def_file_, &result)); + EXPECT_EQ(result.DebugString(), meta_graph_def_.DebugString()); +} + +TEST_F(UtilsTest, ReadReadMetaGraphDefFromFile_Binary) { + MetaGraphDef result; + TF_CHECK_OK(ReadMetaGraphDefFromFile(binary_meta_graph_def_file_, &result)); + EXPECT_EQ(result.DebugString(), meta_graph_def_.DebugString()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From e5537748753491f803fbddebdcb1cdb710631db9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 17:35:46 -0700 Subject: [PATCH 250/411] Batched per_image_standardization PiperOrigin-RevId: 216623201 --- tensorflow/python/ops/image_ops_impl.py | 14 +++++++++----- tensorflow/python/ops/image_ops_test.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 1c75aab578..a5c800ed9f 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1184,7 +1184,8 @@ def per_image_standardization(image): away from zero to protect against division by 0 when handling uniform images. Args: - image: 3-D tensor of shape `[height, width, channels]`. + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. @@ -1194,14 +1195,17 @@ def per_image_standardization(image): """ with ops.name_scope(None, 'per_image_standardization', [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - num_pixels = math_ops.reduce_prod(array_ops.shape(image)) + image = _AssertAtLeast3DImage(image) + if image.get_shape().ndims != 3 and image.get_shape().ndims != 4: + raise ValueError('`image` must have either 3 or 4 dimensions.') + num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-1:-4:-1]) image = math_ops.cast(image, dtype=dtypes.float32) - image_mean = math_ops.reduce_mean(image) + image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) variance = ( - math_ops.reduce_mean(math_ops.square(image)) - + math_ops.reduce_mean( + math_ops.square(image), axis=[-1, -2, -3], keepdims=True) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index ff86df6346..9ed3b4ff5d 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1491,6 +1491,16 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase): whiten_np = whiten.eval() self.assertFalse(np.any(np.isnan(whiten_np))) + def testBatchWhitening(self): + imgs_np = np.random.uniform(0., 255., [4, 24, 24, 3]) + whiten_np = [self._NumpyPerImageWhitening(img) for img in imgs_np] + with self.test_session(use_gpu=True): + imgs = constant_op.constant(imgs_np) + whiten = image_ops.per_image_standardization(imgs) + whiten_tf = whiten.eval() + for w_tf, w_np in zip(whiten_tf, whiten_np): + self.assertAllClose(w_tf, w_np, atol=1e-4) + class CropToBoundingBoxTest(test_util.TensorFlowTestCase): -- GitLab From ed68320998bb13d7802b8cd1a2d02cf80a089052 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 10 Oct 2018 17:41:22 -0700 Subject: [PATCH 251/411] [XLA:GPU] Rework CudnnConvPadForSpeed. - Make the channel-dims-equal-to-3 heuristic more specific. The relevant cudnn optimization only occurs with input-channels equal to 3 and output-channels equal to 32 or 64. - Simplify the logic that decided which shapes' dimensions to pad by using a clever (if I do say so myself) trick. Really all we need is a new layer of indirection which allows us to say "modify the input shape's feature dim" and have that be reflected in the lhs shape's feature dim. - Rename it to CudnnConvPadForTensorCores. I initially thought that the 3-input-channels optimization applied to Pascal, but experimentally this seems to be incorrect. PiperOrigin-RevId: 216623854 --- tensorflow/compiler/xla/service/gpu/BUILD | 14 +- ....cc => cudnn_conv_pad_for_tensor_cores.cc} | 248 ++++++++---------- ...ed.h => cudnn_conv_pad_for_tensor_cores.h} | 25 +- ...> cudnn_conv_pad_for_tensor_cores_test.cc} | 52 +++- .../xla/service/gpu/nvptx_compiler.cc | 12 +- 5 files changed, 176 insertions(+), 175 deletions(-) rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed.cc => cudnn_conv_pad_for_tensor_cores.cc} (55%) rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed.h => cudnn_conv_pad_for_tensor_cores.h} (58%) rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed_test.cc => cudnn_conv_pad_for_tensor_cores_test.cc} (75%) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index ea285994be..4eb5739fe2 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -600,9 +600,9 @@ cc_library( ) cc_library( - name = "cudnn_conv_pad_for_speed", - srcs = ["cudnn_conv_pad_for_speed.cc"], - hdrs = ["cudnn_conv_pad_for_speed.h"], + name = "cudnn_conv_pad_for_tensor_cores", + srcs = ["cudnn_conv_pad_for_tensor_cores.cc"], + hdrs = ["cudnn_conv_pad_for_tensor_cores.h"], deps = [ ":ir_emission_utils", "//tensorflow/compiler/xla:literal_util", @@ -614,10 +614,10 @@ cc_library( ) tf_cc_test( - name = "cudnn_conv_pad_for_speed_test", - srcs = ["cudnn_conv_pad_for_speed_test.cc"], + name = "cudnn_conv_pad_for_tensor_cores_test", + srcs = ["cudnn_conv_pad_for_tensor_cores_test.cc"], deps = [ - ":cudnn_conv_pad_for_speed", + ":cudnn_conv_pad_for_tensor_cores", ":ir_emission_utils", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", @@ -661,7 +661,7 @@ cc_library( hdrs = ["nvptx_compiler.h"], deps = [ ":cudnn_conv_algorithm_picker", - ":cudnn_conv_pad_for_speed", + ":cudnn_conv_pad_for_tensor_cores", ":cudnn_conv_padding_legalization", ":cudnn_conv_rewriter", ":cudnn_fused_conv_rewriter", diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc similarity index 55% rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc index 24b1f1af27..5aa4f839f4 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" @@ -24,50 +24,17 @@ limitations under the License. namespace xla { namespace gpu { -// We want the input/output feature counts of an f16 conv to be factors of 8, -// because without this cudnn can't use tensor cores on the conv. -static constexpr int64 kDesiredNumFeaturesFactor = 8; - // We won't pad a conv if doing so increases the total number of bytes in the // lhs, rhs, or result by more than this amount. // // TODO(jlebar): This number was tuned experimentally. It represents a // compromise on our current benchmarks; it speeds some up significantly, and // doesn't slow any down. But we can observe by changing this value that -// there's additional room for speedups. Achieving those speedups without also -// slowing other things down will likely require a more sophisticated heuristic, -// possibly some form of auto-tuning. -// -// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4" -// special case inside PadShape won't fire. +// there's additional room for speedups. Achieving those speedups without +// also slowing other things down will likely require a more sophisticated +// heuristic, possibly some form of auto-tuning. static constexpr double kMaxBytesTouchedIncrease = 1.35; -// Pads the given dimensions in the given shape up to a multiple of -// kDesiredNumFeaturesFactor. -static Shape PadShape(Shape s, absl::Span dims) { - for (int64 dim : dims) { - int64 dim_to_pad_size = s.dimensions(dim); - - // Round dim_to_pad_size up to the next multiple of - // kDesiredNumFeaturesFactor. - // - // Special case: dims of size 3 are rounded up to 4, not - // kDesiredNumFeaturesFactor. Empirically (and on the advice of nvidia), - // this helps, but as of writing, it's not supported by anything in the - // cudnn docs. - int64 new_dim_to_pad_size; - if (dim_to_pad_size == 3) { - new_dim_to_pad_size = 4; - } else { - new_dim_to_pad_size = - RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor); - } - - s.set_dimensions(dim, new_dim_to_pad_size); - } - return s; -} - // Creates and returns an HLO that zero-pads one or more dimensions in the given // instruction so that its shape is equal to the given shape. // @@ -103,100 +70,19 @@ static HloInstruction* PadInstruction(HloInstruction* instr, HloInstruction::CreatePad(new_shape, instr, zero, pad_config)); } -// Pads the input/output feature dimensions of the given cudnn convolution -// custom-call to be multiples of kDesiredNumFeaturesFactor. -static StatusOr PadFeaturesDims(HloCustomCallInstruction* conv) { +// Modifies the given convolution to have the given LHS/RHS/result shapes. +static Status PadConv(HloCustomCallInstruction* conv, + const Shape& new_lhs_shape, const Shape& new_rhs_shape, + const Shape& new_result_shape) { CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0)) << "conv must use 0 scratch bytes, i.e. this pass must be run " "before CudnnConvAlgorithmPicker."; - TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); - const auto& dnums = conv->convolution_dimension_numbers(); auto* lhs = conv->mutable_operand(0); auto* rhs = conv->mutable_operand(1); - const Shape& result_shape = conv->shape().tuple_shapes(0); - - Shape new_lhs_shape = [&] { - switch (kind) { - case CudnnConvKind::kForward: - case CudnnConvKind::kBackwardFilter: - // LHS is "input". - return PadShape(lhs->shape(), {dnums.input_feature_dimension()}); - case CudnnConvKind::kBackwardInput: - // LHS is "output". - return PadShape(lhs->shape(), {dnums.output_feature_dimension()}); - case CudnnConvKind::kForwardActivation: - LOG(FATAL) << "Not yet implemented."; - } - }(); - - Shape new_rhs_shape = [&] { - switch (kind) { - case CudnnConvKind::kForward: - case CudnnConvKind::kBackwardInput: - // RHS is "filter". - return PadShape(rhs->shape(), - {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); - case CudnnConvKind::kBackwardFilter: - // RHS is "output". - return PadShape(rhs->shape(), {dnums.output_feature_dimension()}); - case CudnnConvKind::kForwardActivation: - LOG(FATAL) << "Not yet implemented."; - } - }(); - - if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) && - ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) { - VLOG(3) << "No need to pad features of " << conv->ToString(); - return false; - } - - Shape new_result_shape = [&] { - switch (kind) { - case CudnnConvKind::kForward: - // Result is "output". - return PadShape(result_shape, {dnums.output_feature_dimension()}); - case CudnnConvKind::kBackwardInput: - // Result is "input". - return PadShape(result_shape, {dnums.input_feature_dimension()}); - case CudnnConvKind::kBackwardFilter: - // Result is "filter". - return PadShape(result_shape, - {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); - case CudnnConvKind::kForwardActivation: - LOG(FATAL) << "Not yet implemented."; - } - }(); - - // Check that padding wouldn't increase the total bytes read/written by this - // operation too much. - auto check_size_increase = [&](const Shape& old_shape, - const Shape& new_shape) { - int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape); - int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape); - if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) { - return true; - } - VLOG(3) << "Not padding convolution; doing so would change input / result " - "shape from " - << ShapeUtil::HumanString(old_shape) << " to " - << ShapeUtil::HumanString(new_shape) << ", a size increase of " - << new_bytes / static_cast(old_bytes) << "x > " - << kMaxBytesTouchedIncrease << "x: " << conv->ToString(); - return false; - }; - if (!check_size_increase(lhs->shape(), new_lhs_shape) || - !check_size_increase(rhs->shape(), new_rhs_shape) || - !check_size_increase(result_shape, new_result_shape)) { - return false; - } - - // OK, let's do the transformation! - auto* new_lhs = PadInstruction(lhs, new_lhs_shape); auto* new_rhs = PadInstruction(rhs, new_rhs_shape); + const Shape& result_shape = conv->shape().tuple_shapes(0); CHECK(new_lhs != lhs || new_rhs != rhs) << "We should have had to pad either LHS or RHS."; @@ -229,7 +115,105 @@ static StatusOr PadFeaturesDims(HloCustomCallInstruction* conv) { VLOG(2) << "Padded features of " << conv->ToString() << ", replaced with " << new_conv->ToString(); - TF_RETURN_IF_ERROR(conv->parent()->ReplaceInstruction(conv, new_conv)); + return conv->parent()->ReplaceInstruction(conv, new_conv); +} + +static StatusOr PadForTensorCores(HloCustomCallInstruction* conv) { + TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); + const auto& dnums = conv->convolution_dimension_numbers(); + auto* lhs = conv->mutable_operand(0); + auto* rhs = conv->mutable_operand(1); + const Shape& result_shape = conv->shape().tuple_shapes(0); + + // Nothing to do on non-f16 convolutions. + if (result_shape.element_type() != PrimitiveType::F16) { + return false; + } + + // TODO(timshen): Don't skip forward-activation convs if we find a benchmark + // where there's a speedup. + if (kind == CudnnConvKind::kForwardActivation) { + return false; + } + + Shape new_lhs_shape = lhs->shape(); + Shape new_rhs_shape = rhs->shape(); + Shape new_result_shape = conv->shape().tuple_shapes(0); + + // new_{input,filter_output}_shape points to the appropriate one of + // new_{lhs,rhs,result}_shape. + Shape* new_input_shape; + Shape* new_filter_shape; + Shape* new_output_shape; + std::tie(new_input_shape, new_filter_shape, new_output_shape) = [&] { + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kForwardActivation: + return std::make_tuple(&new_lhs_shape, &new_rhs_shape, + &new_result_shape); + case CudnnConvKind::kBackwardInput: + return std::make_tuple(&new_result_shape, &new_rhs_shape, + &new_lhs_shape); + case CudnnConvKind::kBackwardFilter: + return std::make_tuple(&new_lhs_shape, &new_result_shape, + &new_rhs_shape); + } + }(); + + // If there are 3 input features and 32 or 64 output features, pad the input + // features to 4. Otherwise, try padding to multiples of 8 and check that + // this doesn't make any of the conv buffers too much larger. + auto input_features = + new_input_shape->dimensions(dnums.input_feature_dimension()); + auto output_features = + new_output_shape->dimensions(dnums.output_feature_dimension()); + if (input_features == 3 && (output_features == 32 || output_features == 64)) { + new_input_shape->set_dimensions(dnums.input_feature_dimension(), 4); + new_filter_shape->set_dimensions(dnums.kernel_input_feature_dimension(), 4); + } else { + auto pad_dim = [](Shape* s, int64 dim) { + s->set_dimensions(dim, RoundUpToNearest(s->dimensions(dim), 8)); + }; + pad_dim(new_input_shape, dnums.input_feature_dimension()); + pad_dim(new_filter_shape, dnums.kernel_input_feature_dimension()); + pad_dim(new_filter_shape, dnums.kernel_output_feature_dimension()); + pad_dim(new_output_shape, dnums.output_feature_dimension()); + + // Check that padding wouldn't increase the total bytes read/written by this + // operation too much. + auto check_size_increase = [&](const Shape& old_shape, + const Shape& new_shape) { + int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape); + int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape); + if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) { + return true; + } + VLOG(3) + << "Not padding convolution; doing so would change input / result " + "shape from " + << ShapeUtil::HumanString(old_shape) << " to " + << ShapeUtil::HumanString(new_shape) << ", a size increase of " + << new_bytes / static_cast(old_bytes) << "x > " + << kMaxBytesTouchedIncrease << "x: " << conv->ToString(); + return false; + }; + + if (!check_size_increase(lhs->shape(), new_lhs_shape) || + !check_size_increase(rhs->shape(), new_rhs_shape) || + !check_size_increase(result_shape, new_result_shape)) { + return false; + } + } + + if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) && + ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) { + VLOG(3) << "No need to pad features of " << conv->ToString(); + return false; + } + + // OK, let's do the transformation! + TF_RETURN_IF_ERROR( + PadConv(conv, new_lhs_shape, new_rhs_shape, new_result_shape)); return true; } @@ -237,26 +221,18 @@ static std::vector GetRelevantConvs( HloComputation* comp) { std::vector convs; for (HloInstruction* instr : comp->instructions()) { - if (!IsCustomCallToDnnConvolution(*instr)) { - continue; - } - auto* custom_call = Cast(instr); - if (custom_call->operand(0)->shape().element_type() == F16 && - // TODO(timshen): Disable for fused conv for now. Implement it if it's - // needed. - custom_call->custom_call_target() != - kCudnnConvBiasActivationForwardCallTarget) { - convs.push_back(custom_call); + if (IsCustomCallToDnnConvolution(*instr)) { + convs.push_back(Cast(instr)); } } return convs; } -StatusOr CudnnConvPadForSpeed::Run(HloModule* module) { +StatusOr CudnnConvPadForTensorCores::Run(HloModule* module) { bool changed = false; for (HloComputation* comp : module->MakeNonfusionComputations()) { for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { - TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv)); + TF_ASSIGN_OR_RETURN(bool result, PadForTensorCores(conv)); changed |= result; } } diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h similarity index 58% rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h index 89a894e9d3..d4e51e86c1 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h @@ -13,27 +13,28 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" namespace xla { namespace gpu { -// Ensures that f16 cudnn convolutions have input/output channel dimensions that -// are multiples of 8, inserting pads/slices as necessary. +// Adds padding to cudnn convolutions to make them run faster on GPUs with +// tensor cores. // -// This is useful primarily for Volta and newer GPUs, where tensor cores can -// only be used if the channel dims are multiples of 8. It's probably the -// opposite of useful on other GPUs, so you should check what GPU you're -// targeting before running this pass. +// - f16 convolutions are padded to have input/output channel dimensions that +// are multiples of 8, so that we can use tensor cores. // -// TODO(jlebar): Rework this. For one thing, it should not be Volta-only. -// Padding input channels 3 to 4 is (we think) applicable to Pascal as well. +// - f16 convolutions with 3 input channels and 32 or 64 output channels are +// padded to 4 input channels. There's a special-cased cudnn algorithm just +// for this. +// +// Don't run this pass on GPUs without tensor cores -- it will make them slower! // // TODO(jlebar): Also pad dots. -class CudnnConvPadForSpeed : public HloModulePass { +class CudnnConvPadForTensorCores : public HloModulePass { public: absl::string_view name() const override { return "cudnn-conv-pad-for-speed"; } @@ -43,4 +44,4 @@ class CudnnConvPadForSpeed : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc similarity index 75% rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc index ec403021e6..074f95698e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" @@ -29,9 +29,9 @@ namespace { namespace op = xla::testing::opcode_matchers; using ::testing::_; -class CudnnConvPadForSpeedTest : public HloVerifiedTestBase {}; +class CudnnConvPadForTensorCoresTest : public HloVerifiedTestBase {}; -TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -42,7 +42,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); SCOPED_TRACE(module().ToString()); @@ -55,7 +55,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 48, 40}))); } -TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -66,7 +66,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget, op::Pad(op::Parameter(0), _), @@ -77,7 +77,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 40, 48}))); } -TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -88,7 +88,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvForwardCallTarget, op::Parameter(0), @@ -96,7 +96,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) { _)); } -TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -108,7 +108,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) { custom_call_target="__cudnn$convBackwardInput" ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( @@ -117,7 +117,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) { _))); } -TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -129,7 +129,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) { custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( @@ -138,7 +138,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) { _))); } -TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) { +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { ParseAndVerifyModule(R"( HloModule TestModule @@ -150,7 +150,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) { custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0 })"); - EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie()); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); auto* root = module().entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( @@ -159,6 +159,30 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) { _))); } +TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) { + ParseAndVerifyModule(R"( + HloModule TestModule + + ENTRY TestComputation { + input = f16[10,20,30,3] parameter(0) + filter = f16[2,2,3,32] parameter(1) + ROOT result = (f16[10,20,30,32], u8[0]) custom-call(input, filter), + window={size=2x2}, dim_labels=b01f_01io->b01f, + custom_call_target="__cudnn$convForward" + })"); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); + auto* root = module().entry_computation()->root_instruction(); + + SCOPED_TRACE(module().ToString()); + EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget, + op::Pad(op::Parameter(0), _), + op::Pad(op::Parameter(1), _))); + EXPECT_TRUE(ShapeUtil::Equal(root->operand(0)->shape(), + ShapeUtil::MakeShape(F16, {10, 20, 30, 4}))); + EXPECT_TRUE(ShapeUtil::Equal(root->operand(1)->shape(), + ShapeUtil::MakeShape(F16, {2, 2, 4, 32}))); +} + } // anonymous namespace } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index b394784fde..829d1499bc 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -39,7 +39,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h" @@ -209,14 +209,14 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, pipeline.AddPass(); pipeline.AddPass(); if (IsVoltaOrLater(*stream_exec)) { - pipeline.AddPass(); - // CudnnConvPadForSpeed leaves behind unnecessary tuple/get-tuple-element - // pairs that TupleSimplifier fixes. + pipeline.AddPass(); + // CudnnConvPadForTensorCores leaves behind unnecessary + // tuple/get-tuple-element pairs that TupleSimplifier fixes. pipeline.AddPass(); } // CudnnConvRewriter, CudnnConvPaddingLegalization and - // CudnnConvPadForSpeed may add instructions which can be simplified by - // constant folding. + // CudnnConvPadForTensorCores may add instructions which can be simplified + // by constant folding. pipeline.AddPass(); TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } -- GitLab From 787f16c1bff954b1385e92cba00a54df1951b6f9 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Wed, 10 Oct 2018 17:44:26 -0700 Subject: [PATCH 252/411] Getting rid of MOVING_AVERAGE_VARIABLES collection usage in quantize_ops. PiperOrigin-RevId: 216624182 --- tensorflow/contrib/quantize/python/quant_ops.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index d9dc7fa62e..c7c099e1c6 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -49,7 +49,7 @@ def _ModelVariable(name, collections=None, trainable=None): collections = list(collections or []) - collections += [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES] + collections += [ops.GraphKeys.GLOBAL_VARIABLES] return variable_scope.get_variable( name, shape=shape, @@ -62,7 +62,7 @@ def LastValueQuantize(inputs, per_channel=False, init_min=-6.0, init_max=6.0, - vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, + vars_collection=None, name_prefix='LastValueQuant', reuse=None, is_training=True, @@ -104,17 +104,18 @@ def LastValueQuantize(inputs, else: min_max_shape = [] + vars_collections = [vars_collection] if vars_collection else [] min_var = _ModelVariable( 'min', shape=min_max_shape, initializer=init_ops.constant_initializer(init_min), - collections=[vars_collection], + collections=vars_collections, trainable=False) max_var = _ModelVariable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(init_max), - collections=[vars_collection], + collections=vars_collections, trainable=False) if not is_training: return _FakeQuantWithMinMaxVars( @@ -212,17 +213,18 @@ def MovingAvgQuantize(inputs, else: min_max_shape = [] + vars_collections = [vars_collection] if vars_collection else [] min_var = _ModelVariable( 'min', shape=min_max_shape, initializer=init_ops.constant_initializer(init_min), - collections=[vars_collection], + collections=vars_collections, trainable=False) max_var = _ModelVariable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(init_max), - collections=[vars_collection], + collections=vars_collections, trainable=False) if not is_training: return _FakeQuantWithMinMaxVars( -- GitLab From d45c30fa4cf35a0cc71c8b18c2b710d1a3559aee Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 10 Oct 2018 17:44:49 -0700 Subject: [PATCH 253/411] [XLA:GPU] Allow input fusion into scatter We fuse everything into the scatter now, and emit two kernels. The first kernel fills the output buffer with the computation fused into the scatter operand. The second kernel is a regular scatter, which also contains the fused operations from the updates and scatter_indices inputs. PiperOrigin-RevId: 216624225 --- .../xla/service/gpu/instruction_fusion.cc | 9 +- .../service/gpu/instruction_fusion_test.cc | 39 +++++ .../xla/service/gpu/ir_emitter_unnested.cc | 135 ++++++++++++++---- .../xla/service/gpu/ir_emitter_unnested.h | 8 ++ .../compiler/xla/service/hlo_matchers.h | 1 + tensorflow/compiler/xla/tests/scatter_test.cc | 31 ++++ 6 files changed, 195 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index b61f038739..1d66787d89 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -47,6 +47,7 @@ bool IsFusible(const HloInstruction& hlo) { hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kReduceWindow || hlo.opcode() == HloOpcode::kReshape || + hlo.opcode() == HloOpcode::kScatter || hlo.opcode() == HloOpcode::kSlice || hlo.opcode() == HloOpcode::kTranspose; } @@ -223,6 +224,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, return false; } + // Scatter is only supported at the root of a kInput fusion. + if (producer->opcode() == HloOpcode::kScatter) { + return false; + } + // Do not fuse into reduce input fusions if the resulting kernel would suffer // from poor data locality (due to unfriendly input layouts). if (IsInputFusibleReduction(*consumer) && @@ -285,7 +291,8 @@ bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer, HloInstruction::FusionKind GpuInstructionFusion::ChooseKind( const HloInstruction* producer, const HloInstruction* consumer) { - if (IsReductionToVector(*consumer)) { + if (IsReductionToVector(*consumer) || + consumer->opcode() == HloOpcode::kScatter) { return HloInstruction::FusionKind::kInput; } if (producer->opcode() == HloOpcode::kDot || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 96bfe0c12e..fd9b7cee80 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -709,5 +709,44 @@ TEST_F(InstructionFusionTest, AvoidsLargeFusion) { } } +TEST_F(InstructionFusionTest, FuseIntoScatter) { + auto module = ParseHloString(R"( + HloModule test_module + + add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + + ENTRY FuseIntoScatter { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + ROOT add = s32[3,3] add(scatter, scatter) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Add(op::Fusion(), op::Fusion())); + EXPECT_EQ(root->operand(0)->fusion_kind(), + HloInstruction::FusionKind::kInput); + EXPECT_THAT(root->operand(0)->fused_expression_root(), + op::Scatter(op::Add(), op::Add(), op::Add())); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 851060da6e..d8ae5b46fe 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -493,13 +493,68 @@ Status IrEmitterUnnested::HandleFft(HloInstruction* fft) { Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { HloInstruction* root = fusion->fused_expression_root(); - // HandleFusion specializes reduction from a multi-dimensional array to a 1D - // array. The specialized version requires a initializer thunk that - // initializes the output array to the initial value of the reduce. if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) { switch (root->opcode()) { + case HloOpcode::kScatter: { + std::vector> thunks; + // The initialization from 'operand' is using different loop bounds, so + // emit it in a separate kernel. Treat it like a loop fusion, writing to + // the output buffer. + { + int unroll_factor = ComputeMaxUnrollFactor(fusion); + thunks.push_back(BuildKernelThunk( + fusion, /*implements_whole_instruction=*/false, unroll_factor)); + + std::vector operand_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + operand_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter operand_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter operand_fused_emitter(operand_parameter_arrays, + &operand_elemental_emitter); + TF_RETURN_IF_ERROR( + root->mutable_operand(0)->Accept(&operand_fused_emitter)); + + TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk( + *fusion, operand_fused_emitter.GetGenerator(root->operand(0)), + static_cast(thunks.back().get()))); + } + + // Now build the actual scatter, reading and writing to the freshly + // filled output buffer. + { + thunks.push_back( + BuildKernelThunk(fusion, + /*implements_whole_instruction=*/false)); + // Spin up a new fused emitter for the scatter kernel and emit it. + std::vector scatter_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + scatter_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter scatter_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter scatter_fused_emitter(scatter_parameter_arrays, + &scatter_elemental_emitter); + TF_RETURN_IF_ERROR(root->Accept(&scatter_fused_emitter)); + TF_RETURN_IF_ERROR(EmitScatter( + thunks.back().get(), root, + /*scatter_indices_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(1)), + /*updates_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(2)))); + } + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), fusion)); + return Status::OK(); + } case HloOpcode::kTuple: case HloOpcode::kReduce: { + // HandleFusion specializes reduction from a multi-dimensional array to + // a 1D array. The specialized version requires a initializer thunk that + // initializes the output array to the initial value of the reduce. if (root->opcode() == HloOpcode::kReduce && ShapeUtil::IsTuple(root->shape())) { // TODO(b/112040122): Support variadic reduce. @@ -1962,9 +2017,6 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { const HloInstruction* operand = scatter->operand(0); const HloInstruction* scatter_indices = scatter->operand(1); const HloInstruction* updates = scatter->operand(2); - const ScatterDimensionNumbers& dim_numbers = - scatter->scatter_dimension_numbers(); - CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape())); std::vector> thunks; @@ -1978,6 +2030,44 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter)); } + thunks.push_back( + BuildKernelThunk(scatter, + /*implements_whole_instruction=*/thunks.empty())); + + TF_RETURN_IF_ERROR( + EmitScatter(thunks.back().get(), scatter, + /*scatter_indices_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*scatter_indices, *scatter) + .EmitReadArrayElement(index, &b_, "scatter_index"); + }, + /*updates_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*updates, *scatter) + .EmitReadArrayElement(index, &b_, "update"); + })); + + // Elide the sequential thunk if there's no copy. + if (thunks.size() == 1) { + thunk_sequence_->push_back(std::move(thunks[0])); + } else { + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), scatter)); + } + return Status::OK(); +} + +Status IrEmitterUnnested::EmitScatter( + Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + const ScatterDimensionNumbers& dim_numbers = + scatter->scatter_dimension_numbers(); + CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape())); + auto loop_body_emitter = [&](const IrArray::Index& index) -> Status { std::vector raw_window_multidim; std::vector input_scatter_multidim; @@ -2023,9 +2113,6 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { scatter_indices_shape.mutable_layout()->add_minor_to_major( dim_numbers.index_vector_dim()); } - llvm_ir::IrArray scatter_indices_reshaped = - GetIrArray(*scatter_indices, *scatter) - .CastToShape(scatter_indices_shape, &b_); // Now load the indices corresponding to the current window from // scatter_indices. @@ -2041,9 +2128,10 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { raw_scatter_index_index.GetConstantWithIndexType(i); int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i); - llvm::Value* loaded_scatter_index = - scatter_indices_reshaped.EmitReadArrayElement(raw_scatter_index_index, - &b_, "scatter_index"); + TF_ASSIGN_OR_RETURN( + llvm::Value* const loaded_scatter_index, + scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape( + scatter_indices_shape, scatter_indices->shape(), &b_))); // And add the index to our window index. This yields the output index. llvm::Value* dim_offset = Add(input_window_multidim[operand_dim], @@ -2068,11 +2156,15 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { // an atomic store to the calculated location in the output. llvm_ir::IrArray::Index input_window_index(input_window_multidim, index.GetType()); - llvm::Value* input_address = - GetIrArray(*updates, *scatter).EmitArrayElementAddress(index, &b_); + HloInstruction* output_hlo = + scatter->IsFused() ? scatter->parent()->FusionInstruction() : scatter; llvm::Value* output_address = - GetIrArray(*scatter, *scatter) + GetIrArray(*output_hlo, *output_hlo) .EmitArrayElementAddress(input_window_index, &b_); + llvm::Value* input_address = Alloca(llvm_ir::PrimitiveTypeToIrType( + updates->shape().element_type(), module_)); + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, updates_gen(index)); + Store(input_ir_value, input_address); return EmitAtomicOperationForNestedComputation( *scatter->to_apply(), output_address, input_address); }; @@ -2080,22 +2172,11 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { // Launch a kernel that reads every element in the updates tensor. We could // also do one kernel per window instead if bounds checks turn out to be a // bottleneck. - thunks.push_back( - BuildKernelThunk(scatter, - /*implements_whole_instruction=*/thunks.empty())); - LaunchDimensions launch_dimensions = CalculateLaunchDimensions( updates->shape(), ir_emitter_context_->device_description()); - UpdateLaunchDimensions(launch_dimensions, - static_cast(thunks.back().get()), + UpdateLaunchDimensions(launch_dimensions, thunk, ir_emitter_context_->llvm_module()); - if (thunks.size() == 1) { - thunk_sequence_->push_back(std::move(thunks[0])); - } else { - thunk_sequence_->emplace_back( - absl::make_unique(std::move(thunks), scatter)); - } return ParallelLoopEmitter(loop_body_emitter, updates->shape(), launch_dimensions, &b_) .EmitLoop(IrName(scatter), diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 2e36e7235b..93f11c069a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -185,6 +185,14 @@ class IrEmitterUnnested : public IrEmitter { absl::Span> extra_output_gens); + // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in + // the process. `scatter` may be fused, scatter indices are taken from + // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is + // expected to have the operand values in it already. + Status EmitScatter(Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen); + // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel // for the hlo instruction. bool CheckAndEmitHloWithTile021(HloInstruction* hlo); diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index b05a012b4a..1717770301 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -217,6 +217,7 @@ HLO_MATCHER(Remainder); HLO_MATCHER(Reshape); HLO_MATCHER(Reverse); HLO_MATCHER(Rng); +HLO_MATCHER(Scatter); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc index b21dd56045..d0cb93befa 100644 --- a/tensorflow/compiler/xla/tests/scatter_test.cc +++ b/tensorflow/compiler/xla/tests/scatter_test.cc @@ -69,6 +69,37 @@ ENTRY main { RunTest(hlo_text, &operand, &scatter_indices, &updates); } +XLA_TEST_F(ScatterTest, TensorFlowScatterV1_WithFusedAdds) { + const string hlo_text = R"( +HloModule TensorFlowScatterV1 + +update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) +} + +ENTRY main { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 +} +)"; + Literal operand = + LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + Literal scatter_indices = LiteralUtil::CreateR1({0, 1}); + Literal updates = LiteralUtil::CreateR2({{10, 20, 30}, {70, 80, 90}}); + RunTest(hlo_text, &operand, &scatter_indices, &updates); +} + XLA_TEST_F(ScatterTest, TensorFlowScatterV2_Update) { const char* hlo_text = R"( HloModule TensorFlowScatterV2 -- GitLab From f043aab73ceaa51ea12867c87ea6f856c3c02089 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 18:08:11 -0700 Subject: [PATCH 254/411] Fixes in CompareConstantArrays: - Array fields minmax and quantization_params are pointers. (so the current check could have false negatives as identical objects have different addresses) - also compare narrow_range. (so the current check could have false positives --- my bad, I added narrow_range later and forgot to update this code). PiperOrigin-RevId: 216626868 --- .../contrib/lite/kernels/internal/types.h | 5 +++ tensorflow/contrib/lite/toco/tooling_util.cc | 38 ++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index c6bc6074d4..a5913143b9 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -107,6 +107,11 @@ struct QuantizationParams { double scale = 0.0; }; +inline bool operator==(const QuantizationParams& qp1, + const QuantizationParams& qp2) { + return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale; +} + template struct Dims { int sizes[N]; diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 61aa311212..a770ff8544 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -738,15 +738,41 @@ bool CompareArrayBuffers(const Array& lhs_array, const Array& rhs_array) { } return true; } + +bool HaveSameMinMax(const Array& lhs_array, const Array& rhs_array) { + if (lhs_array.minmax || rhs_array.minmax) { + if (!lhs_array.minmax || !rhs_array.minmax) { + return false; + } + if (!(*lhs_array.minmax == *rhs_array.minmax)) { + return false; + } + } + return true; +} + +bool HaveSameQuantizationParams(const Array& lhs_array, + const Array& rhs_array) { + if (lhs_array.quantization_params || rhs_array.quantization_params) { + if (!lhs_array.quantization_params || !rhs_array.quantization_params) { + return false; + } + if (!(*lhs_array.quantization_params == *rhs_array.quantization_params)) { + return false; + } + } + return true; +} + } // namespace bool CompareConstantArrays(const Array& lhs_array, const Array& rhs_array) { - bool attrs_equal = - lhs_array.shape() == rhs_array.shape() && - lhs_array.data_type == rhs_array.data_type && - lhs_array.final_data_type == rhs_array.final_data_type && - lhs_array.minmax == rhs_array.minmax && - lhs_array.quantization_params == rhs_array.quantization_params; + bool attrs_equal = lhs_array.shape() == rhs_array.shape() && + lhs_array.data_type == rhs_array.data_type && + lhs_array.final_data_type == rhs_array.final_data_type && + HaveSameMinMax(lhs_array, rhs_array) && + HaveSameQuantizationParams(lhs_array, rhs_array) && + lhs_array.narrow_range == rhs_array.narrow_range; if (!attrs_equal) { return false; } -- GitLab From 4e97b77576f89103073afd378e85c3c4b5dc17db Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Wed, 10 Oct 2018 18:10:32 -0700 Subject: [PATCH 255/411] [XLA::GPU] Explicitly use {lhs,rhs}_contracting_dimension from DotDimensionNumbers as the reduction dimension for Dot. PiperOrigin-RevId: 216627100 --- .../compiler/xla/service/gpu/ir_emitter.cc | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 47102347cb..a3821e077e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -495,18 +495,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { TF_RET_CHECK(!ShapeUtil::IsScalar(lhs_shape) && !ShapeUtil::IsScalar(rhs_shape)); - // Reduce along the last dimension of the LHS and the second-to-last dimension - // of the RHS. Vectors are a special case where the reduction dimension is 0 - // for both LHS and RHS. This results in a vector dot product producing a - // scalar. - const int64 lhs_reduction_dimension = - ShapeUtil::GetDimensionNumber(lhs_shape, -1); - const int64 rhs_reduction_dimension = - ShapeUtil::Rank(rhs_shape) >= 2 + dnums.lhs_batch_dimensions_size() - ? ShapeUtil::GetDimensionNumber(rhs_shape, -2) - : dnums.lhs_batch_dimensions_size(); - - // Check that the batch dims don't cover the last two dims. + const int64 lhs_reduction_dimension = dnums.lhs_contracting_dimensions(0); + const int64 rhs_reduction_dimension = dnums.rhs_contracting_dimensions(0); + + // Check that the batch dims don't cover the reduction dimensions. for (int64 batch_dim : dnums.lhs_batch_dimensions()) { CHECK_NE(lhs_reduction_dimension, batch_dim); CHECK_NE(rhs_reduction_dimension, batch_dim); @@ -514,7 +506,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { // Verify the reduction dimension in the two operands are the same size. TF_RET_CHECK(lhs_shape.dimensions(lhs_reduction_dimension) == - rhs_shape.dimensions(rhs_reduction_dimension)); + rhs_shape.dimensions(rhs_reduction_dimension)) + << "lhs_shape.dimensions(" << lhs_reduction_dimension + << ") = " << lhs_shape.dimensions(lhs_reduction_dimension) + << ", and rhs_shape.dimensions(" << rhs_reduction_dimension + << ") = " << rhs_shape.dimensions(rhs_reduction_dimension); // Create loop nests which loop through the LHS operand dimensions and the RHS // operand dimensions. The reduction dimension of the LHS and RHS are handled -- GitLab From ba2d5c3a7c415205ac796740e8b52c7fec2c8ec7 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 10 Oct 2018 18:11:45 -0700 Subject: [PATCH 256/411] Automated rollback of commit 9bad98c61f27b60152119bb1c2cfd402c3bf7f3d PiperOrigin-RevId: 216627219 --- tensorflow/core/framework/model.cc | 32 ++++++------------- .../core/kernels/data/cache_dataset_ops.cc | 14 ++++---- .../assert_next_dataset_op_test.py | 12 +++++++ .../data/experimental/ops/prefetching_ops.py | 9 ------ tensorflow/python/data/ops/dataset_ops.py | 29 +++++++++-------- .../data/ops/multi_device_iterator_ops.py | 4 --- 6 files changed, 43 insertions(+), 57 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 9684b736a7..bfdb3a6658 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -59,15 +59,9 @@ int64 Model::Node::ProcessingTimeLocked() { return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs(); } case Type::FILTER: { - if (inputs_.size() <= 1) { - return NanosPerElementLocked(); - } std::shared_ptr input = inputs_.front(); - double ratio = 0.0L; - if (num_elements_ > 0) { - ratio = static_cast(input->num_elements()) / - static_cast(num_elements_); - } + double ratio = static_cast(input->num_elements()) / + static_cast(num_elements_); return NanosPerElementLocked() + static_cast(ratio * static_cast(ProcessingTimeForInputs())); @@ -121,21 +115,15 @@ int64 Model::Node::OutputTimeLocked(std::vector* input_times) { batch_size * OutputTimeForInputs(input_times); } case Type::FILTER: { - if (inputs_.size() <= 1) { - return NanosPerElementLocked(); - } std::shared_ptr input = inputs_.front(); - double ratio = 0.0L; - if (num_elements_ > 0) { - ratio = static_cast(input->num_elements()) / - static_cast(num_elements_); - int64 old_value = (*input_times)[input_times->size() - 1]; - (*input_times)[input_times->size() - 1] = static_cast( - static_cast(old_value + NanosPerElementLocked()) / ratio); - auto cleanup = gtl::MakeCleanup([input_times, old_value]() { - (*input_times)[input_times->size() - 1] = old_value; - }); - } + int64 old_value = (*input_times)[input_times->size() - 1]; + double ratio = static_cast(input->num_elements()) / + static_cast(num_elements_); + (*input_times)[input_times->size() - 1] = static_cast( + static_cast(old_value + NanosPerElementLocked()) / ratio); + auto cleanup = gtl::MakeCleanup([input_times, old_value]() { + (*input_times)[input_times->size() - 1] = old_value; + }); return NanosPerElementLocked() + static_cast( static_cast(OutputTimeForInputs(input_times)) * ratio); diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index f2419db3dc..34c6c86538 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -516,12 +516,10 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { // `FileReaderIterator` and seek to the `cur_index`. switch (mode_) { case Mode::read: - iterator_.reset(new FileReaderIterator( - {dataset(), strings::StrCat(prefix(), "Impl")})); + iterator_.reset(new FileReaderIterator({dataset(), prefix()})); break; case Mode::write: - iterator_.reset(new FileWriterIterator( - {dataset(), strings::StrCat(prefix(), "Impl")})); + iterator_.reset(new FileWriterIterator({dataset(), prefix()})); } } @@ -868,12 +866,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { switch (mode_) { case Mode::read: - iterator_.reset(new MemoryReaderIterator( - {dataset(), strings::StrCat(prefix(), "Impl")}, cache_)); + iterator_.reset( + new MemoryReaderIterator({dataset(), prefix()}, cache_)); break; case Mode::write: - iterator_.reset(new MemoryWriterIterator( - {dataset(), strings::StrCat(prefix(), "Impl")}, cache_)); + iterator_.reset( + new MemoryWriterIterator({dataset(), prefix()}, cache_)); } } diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py index a138436fff..45b77b5c20 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py @@ -48,6 +48,18 @@ class AssertNextDatasetTest(test_base.DatasetTestBase): "Map transformation instead."): sess.run(get_next) + def testAssertNextShort(self): + dataset = dataset_ops.Dataset.from_tensors(0).apply( + optimization.assert_next(["Map", "Whoops"])).map(lambda x: x) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.cached_session() as sess: + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + "Asserted next 2 transformations but encountered only 1."): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py index df082e9e35..48d7136f95 100644 --- a/tensorflow/python/data/experimental/ops/prefetching_ops.py +++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py @@ -506,15 +506,6 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset): else: return super(_CopyToDeviceDataset, self).make_one_shot_iterator() - def make_initializable_iterator(self): - if self._is_gpu_target: - # TODO(b/116140813) : Enable dynamic optimizations. - options = dataset_ops.Options() - options.experimental_autotune = False - return self.with_options(options).make_initializable_iterator() - else: - return super(_CopyToDeviceDataset, self).make_initializable_iterator() - def _as_variant_tensor(self): with ops.device(self._target_device): return gen_dataset_ops.generator_dataset( diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index d7e37da48b..cdb883cac9 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -99,16 +99,6 @@ class Dataset(object): return options return Options() - def _apply_options(self): - dataset = self - options = self.options() - static_optimizations = options._static_optimizations() # pylint: disable=protected-access - if static_optimizations: - dataset = _OptimizeDataset(dataset, static_optimizations) - if options.experimental_autotune is not False: - dataset = _ModelDataset(dataset) - return dataset - def make_initializable_iterator(self, shared_name=None): """Creates an `Iterator` for enumerating the elements of this dataset. @@ -137,7 +127,13 @@ class Dataset(object): raise RuntimeError( "dataset.make_initializable_iterator is not supported when eager " "execution is enabled.") - dataset = self._apply_options() + dataset = self + options = self.options() + static_optimizations = options._static_optimizations() # pylint: disable=protected-access + if static_optimizations: + dataset = _OptimizeDataset(dataset, static_optimizations) + if options.experimental_autotune: + dataset = _ModelDataset(dataset) if shared_name is None: shared_name = "" if compat.forward_compatible(2018, 8, 3): @@ -167,8 +163,7 @@ class Dataset(object): RuntimeError: If eager execution is not enabled. """ if context.executing_eagerly(): - dataset = self._apply_options() - return iterator_ops.EagerIterator(dataset) + return iterator_ops.EagerIterator(self) else: raise RuntimeError("dataset.__iter__() is only supported when eager " "execution is enabled.") @@ -199,7 +194,13 @@ class Dataset(object): core_random_seed.set_random_seed( (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1)) - dataset = self._apply_options() + dataset = self + options = self.options() + static_optimizations = options._static_optimizations() # pylint: disable=protected-access + if static_optimizations: + dataset = _OptimizeDataset(dataset, static_optimizations) + if options.experimental_autotune: + dataset = _ModelDataset(dataset) return dataset._as_variant_tensor() # pylint: disable=protected-access try: diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py index 3bcc20b333..b7d3aac206 100644 --- a/tensorflow/python/data/ops/multi_device_iterator_ops.py +++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py @@ -206,10 +206,6 @@ class MultiDeviceIterator(object): i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, device, self._dataset.output_shapes, self._dataset.output_types, self._dataset.output_classes) - # TODO(b/116140813) : Enable dynamic optimizations. - options = dataset_ops.Options() - options.experimental_autotune = False - ds = ds.with_options(options) if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) with ops.device(device): -- GitLab From 073c727871c6d4c8e696cb4b071e131f5550bb62 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 10 Oct 2018 18:41:23 -0700 Subject: [PATCH 257/411] [XLA:GPU] Use HloVerifiedModule in cudnn_conv_pad_for_tensor_cores_test. No functional change. PiperOrigin-RevId: 216629980 --- .../cudnn_conv_pad_for_tensor_cores_test.cc | 67 ++++++++++--------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc index 074f95698e..fa3afa6a5d 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc @@ -32,7 +32,7 @@ using ::testing::_; class CudnnConvPadForTensorCoresTest : public HloVerifiedTestBase {}; TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -41,11 +41,12 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) { ROOT result = (f16[10,20,30,40], u8[0]) custom-call(input, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); - SCOPED_TRACE(module().ToString()); + SCOPED_TRACE(module->ToString()); EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget, op::Pad(op::Parameter(0), _), op::Pad(op::Parameter(1), _))); @@ -56,7 +57,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -65,9 +66,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { ROOT result = (f16[10,20,30,40], u8[0]) custom-call(output, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget, op::Pad(op::Parameter(0), _), op::Pad(op::Parameter(1), _))); @@ -78,7 +80,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -87,9 +89,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) { ROOT result = (f16[10,20,30,41], u8[0]) custom-call(input, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvForwardCallTarget, op::Parameter(0), op::Pad(op::Parameter(1), _)))), @@ -97,7 +100,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -107,9 +110,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardInputCallTarget, op::Parameter(0), @@ -118,7 +122,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -128,9 +132,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardFilterCallTarget, @@ -139,7 +144,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -149,9 +154,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardFilterCallTarget, @@ -160,7 +166,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { } TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) { - ParseAndVerifyModule(R"( + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -169,11 +175,12 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) { ROOT result = (f16[10,20,30,32], u8[0]) custom-call(input, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" - })"); - EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); - SCOPED_TRACE(module().ToString()); + SCOPED_TRACE(module->ToString()); EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget, op::Pad(op::Parameter(0), _), op::Pad(op::Parameter(1), _))); -- GitLab From 9ef04f539932c286bcf6c9b05c06b5c3981bd892 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 10 Oct 2018 18:52:45 -0700 Subject: [PATCH 258/411] Disable signed-compare warnings in TensorFlow default build. We have a lot of such warnings and don't make an attempt to clean them up internally. PiperOrigin-RevId: 216631010 --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 89dc79b6b6..07c6e4108e 100644 --- a/configure.py +++ b/configure.py @@ -497,7 +497,7 @@ def set_cc_opt_flags(environ_cp): elif is_windows(): default_cc_opt_flags = '/arch:AVX' else: - default_cc_opt_flags = '-march=native' + default_cc_opt_flags = '-march=native -Wno-sign-compare' question = ('Please specify optimization flags to use during compilation when' ' bazel option "--config=opt" is specified [Default is %s]: ' ) % default_cc_opt_flags -- GitLab From 9153b897c4dfb8685d78397e22c1acd5ff24d40a Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 10 Oct 2018 19:09:31 -0700 Subject: [PATCH 259/411] Fp16 LSTMBlocKCell and LSTMBlockFusedCell PiperOrigin-RevId: 216632480 --- tensorflow/contrib/rnn/kernels/blas_gemm.cc | 7 +- tensorflow/contrib/rnn/kernels/blas_gemm.h | 9 +- tensorflow/contrib/rnn/kernels/lstm_ops.cc | 163 +++++++++--------- tensorflow/contrib/rnn/kernels/lstm_ops.h | 34 ++-- .../contrib/rnn/kernels/lstm_ops_gpu.cu.cc | 80 +++++++-- tensorflow/contrib/rnn/ops/lstm_ops.cc | 8 +- tensorflow/contrib/rnn/python/ops/lstm_ops.py | 5 +- 7 files changed, 185 insertions(+), 121 deletions(-) diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc index 45d22b739b..56ec86418d 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc @@ -38,8 +38,9 @@ namespace functor { template void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, uint64 n, uint64 k, - T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc) { + float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, + int ldc) { #if GOOGLE_CUDA se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, se::blas::Transpose::kTranspose}; @@ -60,8 +61,8 @@ void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, #endif } +template struct TensorCuBlasGemm; template struct TensorCuBlasGemm; -template struct TensorCuBlasGemm; } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h index a52c934233..9535a76566 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.h +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h @@ -28,8 +28,8 @@ namespace functor { template struct TensorCuBlasGemm { void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, - uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc); + uint64 n, uint64 k, float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, int ldc); }; template @@ -38,8 +38,9 @@ struct TensorBlasGemm; template struct TensorBlasGemm { static void compute(OpKernelContext* ctx, const Device& d, bool transa, - bool transb, T alpha, typename TTypes::ConstMatrix a, - typename TTypes::ConstMatrix b, T beta, + bool transb, float alpha, + typename TTypes::ConstMatrix a, + typename TTypes::ConstMatrix b, float beta, typename TTypes::Matrix c) { int64 m = c.dimensions()[0]; int64 n = c.dimensions()[1]; diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc index 5e7cf0ce84..ee08d306f8 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc @@ -44,7 +44,7 @@ namespace functor { template void LSTMBlockCellFpropWithEigen( const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d, - const T forget_bias, const T cell_clip, bool use_peephole, + const float forget_bias, const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -177,50 +177,51 @@ void LSTMBlockCellBpropWithEigen( } } -#define DEFINE_CPU_SPECS(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithEigen( \ - *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ - h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ - } \ - template <> \ - void LSTMBlockCellBprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ - typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ - typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ - typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ - typename TTypes::ConstMatrix co, \ - typename TTypes::ConstMatrix cs_grad, \ - typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ - typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ - typename TTypes::Matrix df, typename TTypes::Matrix di, \ - typename TTypes::Matrix dicfo, \ - typename TTypes::Matrix cs_prev_grad, \ - typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ - typename TTypes::Vec wco_grad) { \ - LSTMBlockCellBpropWithEigen( \ - *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ - i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ - cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ - } \ - template struct LSTMBlockCellFprop; \ +#define DEFINE_CPU_SPECS(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ + LSTMBlockCellFpropWithEigen( \ + *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ + h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ + } \ + template <> \ + void LSTMBlockCellBprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ + typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ + typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ + typename TTypes::ConstMatrix co, \ + typename TTypes::ConstMatrix cs_grad, \ + typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ + typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ + typename TTypes::Matrix df, typename TTypes::Matrix di, \ + typename TTypes::Matrix dicfo, \ + typename TTypes::Matrix cs_prev_grad, \ + typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ + typename TTypes::Vec wco_grad) { \ + LSTMBlockCellBpropWithEigen( \ + *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ + i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ + cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ + } \ + template struct LSTMBlockCellFprop; \ template struct LSTMBlockCellBprop; DEFINE_CPU_SPECS(float); @@ -377,24 +378,26 @@ REGISTER_KERNEL(float); #if GOOGLE_CUDA namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ - \ +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ + \ extern template struct LSTMBlockCellFprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -405,6 +408,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -629,9 +633,9 @@ class LSTMBlockCellGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); functor::LSTMBlockCellBprop(batch_size, input_size, cell_size)( @@ -688,6 +692,7 @@ namespace functor { true /* USE_CUBLAS */>; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor @@ -698,6 +703,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -984,10 +990,10 @@ class BlockLSTMOp : public OpKernel { Tensor cs_tensor = cs_out->Slice(seq_len_max, timelen); Tensor h_tensor = h_out->Slice(seq_len_max, timelen); - functor::TensorUnalignedZero()( - device, cs_tensor.unaligned_flat()); - functor::TensorUnalignedZero()( - device, h_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + cs_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + h_tensor.unaligned_flat()); } } @@ -1021,6 +1027,7 @@ namespace functor { extern template struct TensorUnalignedZero; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1033,6 +1040,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -1195,16 +1203,15 @@ class BlockLSTMGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, cs_grad_tensor.flat()); - functor::TensorZero()(device, - cs_prev_grad_tensor->flat()); - functor::TensorZero()(device, h_grad_tensor.flat()); - functor::TensorZero()(device, h_prev_grad_tensor->flat()); - functor::TensorZero()(device, w_grad_tensor->flat()); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); - functor::TensorZero()(device, b_grad_tensor->flat()); + functor::TensorZero()(device, cs_grad_tensor.flat()); + functor::TensorZero()(device, cs_prev_grad_tensor->flat()); + functor::TensorZero()(device, h_grad_tensor.flat()); + functor::TensorZero()(device, h_prev_grad_tensor->flat()); + functor::TensorZero()(device, w_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, b_grad_tensor->flat()); const int64 seq_len_max = seq_len_max_tensor->scalar()(); SliceHelper slicer(ctx); @@ -1331,6 +1338,7 @@ namespace functor { extern template struct BlockLSTMBprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1343,6 +1351,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h index d23cedc234..5ca1dad655 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.h +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h @@ -77,8 +77,7 @@ template struct TensorZeroPadding { void operator()(const Device& d, const int64 time_idx, typename TTypes::ConstVec seq_len, - typename TTypes::Vec mask, - typename TTypes::Matrix m) { + typename TTypes::Vec mask, typename TTypes::Matrix m) { // mask is shape [batch_size]. mask.device(d) = seq_len.constant(time_idx) < seq_len; @@ -154,18 +153,21 @@ struct LSTMBlockCellFprop : public LSTMBlockCell { const int cell_size) : LSTMBlockCell(batch_size, input_size, cell_size) {} - void operator()( - OpKernelContext* ctx, const Device& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, - typename TTypes::ConstMatrix cs_prev, - typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, - typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, - typename TTypes::ConstVec wco, typename TTypes::ConstVec b, - typename TTypes::Matrix xh, typename TTypes::Matrix i, - typename TTypes::Matrix cs, typename TTypes::Matrix f, - typename TTypes::Matrix o, typename TTypes::Matrix ci, - typename TTypes::Matrix co, typename TTypes::Matrix icfo, - typename TTypes::Matrix h); + void operator()(OpKernelContext* ctx, const Device& d, + const float forget_bias, const float cell_clip, + bool use_peephole, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, + typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, + typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, + typename TTypes::ConstVec b, typename TTypes::Matrix xh, + typename TTypes::Matrix i, typename TTypes::Matrix cs, + typename TTypes::Matrix f, typename TTypes::Matrix o, + typename TTypes::Matrix ci, typename TTypes::Matrix co, + typename TTypes::Matrix icfo, + typename TTypes::Matrix h); }; // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for @@ -261,7 +263,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { typename TTypes::ConstMatrix const_dicfo(dicfo.data(), dicfo.dimensions()); TensorBlasGemm::compute( - ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad); + ctx, d, false, true, 1.f, const_dicfo, w, 0.f, xh_grad); // xh. xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x; @@ -274,7 +276,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { // w_grad. TensorBlasGemm::compute( - ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad); + ctx, d, true, false, 1.f, const_xh, const_dicfo, 1.f, w_grad); // b_grad. b_grad.device(d) += dicfo.sum(Eigen::array({0})); diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc index 6d3758fef1..b664b0f45e 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc @@ -31,6 +31,49 @@ typedef Eigen::GpuDevice GPUDevice; namespace { +struct FloatToHalf { + __host__ __device__ EIGEN_STRONG_INLINE Eigen::half operator()( + const float& x) const { + return Eigen::half_impl::float_to_half_rtne(x); + } +}; + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t); + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t) { + return t; +} + +template <> +__host__ __device__ EIGEN_STRONG_INLINE Eigen::half +strict_cast(float t) { + return FloatToHalf()(t); +} + +} // namespace + +template +struct TensorZero { + void operator()(const GPUDevice& d, typename TTypes::Flat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +template +struct TensorUnalignedZero { + void operator()(const GPUDevice& d, typename TTypes::UnalignedFlat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +namespace { + // Adds bias, applies non-linearities and gates. // // Launch with a 2D setup such that there is one thread per (example, @@ -42,12 +85,15 @@ namespace { template __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, const T* wci, const T* wcf, const T* wco, T* o, T* h, - T* ci, T* cs, T* co, T* i, T* f, const T forget_bias, - const T cell_clip, const int batch_size, - const int cell_size) { + T* ci, T* cs, T* co, T* i, T* f, + const float forget_bias, const float cell_clip, + const int batch_size, const int cell_size) { const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; const int act_id = blockIdx.y * blockDim.y + threadIdx.y; + T forget_bias_t = strict_cast(forget_bias); + T cell_clip_t = strict_cast(cell_clip); + if (batch_id >= batch_size || act_id >= cell_size) return; // The following code assumes the input arrays are of the following @@ -115,16 +161,16 @@ __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, T f_local; if (use_peephole) { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias + cs_prev[cid] * wcf[act_id]); + forget_bias_t + cs_prev[cid] * wcf[act_id]); } else { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias); + forget_bias_t); } f[cid] = f_local; T cs_local = i_local * ci_local + f_local * cs_prev[cid]; - if (cell_clip > 0.0) { - cs_local = clip_op(cs_local, cell_clip); + if (cell_clip_t > strict_cast(0.0f)) { + cs_local = clip_op(cs_local, cell_clip_t); } cs[cid] = cs_local; @@ -174,8 +220,8 @@ __global__ void concat_xh(T* xh, const T* x, const T* h_prev, template void LSTMBlockCellFpropWithCUDA( - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, + const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -202,7 +248,7 @@ void LSTMBlockCellFpropWithCUDA( // states1 = xh * w typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); TensorBlasGemm::compute( - ctx, d, false, false, T(1), const_xh, w, T(0), icfo); + ctx, d, false, false, 1.f, const_xh, w, 0.f, icfo); // Add bias, apply non-linearities and gating. // @@ -357,8 +403,9 @@ void LSTMBlockCellBpropWithCUDA( template struct TensorAdd; \ template <> \ void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix cs_prev, \ typename TTypes::ConstMatrix h_prev, \ typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ @@ -368,10 +415,10 @@ void LSTMBlockCellBpropWithCUDA( typename TTypes::Matrix f, typename TTypes::Matrix o, \ typename TTypes::Matrix ci, typename TTypes::Matrix co, \ typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole, \ - x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \ - cs, f, o, ci, co, icfo, h, batch_size_, \ - cell_size_, input_size_); \ + LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, \ + use_peephole, x, cs_prev, h_prev, w, wci, \ + wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, \ + h, batch_size_, cell_size_, input_size_); \ } \ template <> \ void LSTMBlockCellBprop::operator()( \ @@ -403,6 +450,7 @@ void LSTMBlockCellBpropWithCUDA( template struct BlockLSTMBprop; DEFINE_GPU_SPECS(float); +DEFINE_GPU_SPECS(Eigen::half); // DEFINE_GPU_SPECS(double); #undef DEFINE_GPU_SPECS diff --git a/tensorflow/contrib/rnn/ops/lstm_ops.cc b/tensorflow/contrib/rnn/ops/lstm_ops.cc index 699cc6c88a..1679e35518 100644 --- a/tensorflow/contrib/rnn/ops/lstm_ops.cc +++ b/tensorflow/contrib/rnn/ops/lstm_ops.cc @@ -41,7 +41,7 @@ REGISTER_OP("LSTMBlockCell") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -128,7 +128,7 @@ REGISTER_OP("LSTMBlockCellGrad") .Output("wcf_grad: T") .Output("wco_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -196,7 +196,7 @@ REGISTER_OP("BlockLSTM") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); @@ -288,7 +288,7 @@ REGISTER_OP("BlockLSTMGrad") .Output("wco_grad: T") .Output("b_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev, h_prev, w, wci, wco, wcf, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 9e61fc54d1..f645165efe 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -596,6 +596,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): cell_clip=None, use_peephole=False, reuse=None, + dtype=None, name="lstm_fused_cell"): """Initialize the LSTM cell. @@ -607,12 +608,14 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): reuse: (optional) boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + dtype: the dtype of variables of this layer. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. By default this is "lstm_cell", for variable-name compatibility with `tf.nn.rnn_cell.LSTMCell`. """ - super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name) + super(LSTMBlockFusedCell, self).__init__( + _reuse=reuse, name=name, dtype=dtype) self._num_units = num_units self._forget_bias = forget_bias self._cell_clip = cell_clip if cell_clip is not None else -1 -- GitLab From 905deeaadd41d529461d8a6666e9cf46f0097a8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 19:19:07 -0700 Subject: [PATCH 260/411] Automated rollback of commit 3abfe2cd9befa263de57edfae7d4c0d29c9c9182 PiperOrigin-RevId: 216633097 --- .../xla/service/compile_only_service.cc | 2 - .../compiler/xrt/kernels/xrt_compile_ops.cc | 19 +--- .../compiler/xrt/kernels/xrt_execute_op.cc | 8 ++ .../compiler/xrt/ops/xrt_compile_ops.cc | 7 +- tensorflow/compiler/xrt/tests/BUILD | 13 +-- tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +----------------- 6 files changed, 20 insertions(+), 135 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index bd5045b9b9..96bd2616f5 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -89,8 +89,6 @@ CompileOnlyService::CompileAheadOfTime( const auto& program_shape = instance.computation.program_shape(); ExecutionOptions execution_options; *execution_options.mutable_debug_options() = debug_options; - *execution_options.mutable_shape_with_output_layout() = - *instance.result_layout; TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(program_shape, instance.argument_layouts, diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 1ab836a496..1d4f8d97f2 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -166,21 +166,10 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Compiling XLA executable"; return Compile(ctx, computation_proto, program); })); - std::unique_ptr entry; - OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry)); - - Tensor handle_output(DT_INT64, TensorShape({})); - handle_output.scalar()() = uid; - ctx->set_output(0, handle_output); - - xla::LocalExecutable* executable = entry->get().get_executable(); - xla::ProgramShape program_shape = executable->executable() - ->module() - .entry_computation() - ->ComputeProgramShape(); - Tensor program_shape_output(DT_STRING, TensorShape({1})); - program_shape_output.vec()(0) = program_shape.SerializeAsString(); - ctx->set_output(1, program_shape_output); + + Tensor output(DT_INT64, TensorShape({})); + output.scalar()() = uid; + ctx->set_output(0, output); } XRTCompileOp::~XRTCompileOp() = default; diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index 3a1e03280a..257b054f16 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -64,6 +64,14 @@ uint32 GetXLARandomSeed() { return counter.fetch_add(2); } +// Looks up the input `key` in the compilation cache. +Status GetComputationCacheEntry( + XRTCompilationCache* cache, int64 key, + std::unique_ptr* entry) { + TF_RETURN_IF_ERROR(cache->Lookup(key, entry)); + return Status::OK(); +} + // Populates `inputs` with the input tensors to the computation. Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm, bool release_inputs, diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc index 7b3b50c695..5cfc8711f9 100644 --- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc @@ -23,12 +23,7 @@ namespace tensorflow { REGISTER_OP("XRTCompile") .Input("computation: string") .Output("handle: int64") - .Output("program_shape: string") - .SetShapeFn([](shape_inference::InferenceContext* c) { - c->set_output(0, c->Scalar()); - c->set_output(1, c->UnknownShapeOfRank(1)); - return Status::OK(); - }) + .SetShapeFn(tensorflow::shape_inference::ScalarShape) .Doc( R"( Reads a computation proto, compiles it, and places it in the global compilation diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD index be44a3474a..b6dcfc4eb9 100644 --- a/tensorflow/compiler/xrt/tests/BUILD +++ b/tensorflow/compiler/xrt/tests/BUILD @@ -29,11 +29,8 @@ cc_library( "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/compiler/xla/client:xla_computation", - "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xrt:xrt_proto", "//tensorflow/compiler/xrt:xrt_server", "//tensorflow/compiler/xrt/cc:xrt_ops", @@ -52,10 +49,7 @@ tf_cc_test( name = "raw_api_test_cpu", size = "medium", srcs = [], - args = [ - "--xla_test_device=XLA_CPU", - "--xla_platform=CPU", - ], + args = ["--xla_test_device=XLA_CPU"], deps = [ ":raw_api_test_lib", "//tensorflow/compiler/jit:xla_cpu_device", @@ -66,10 +60,7 @@ tf_cuda_cc_test( name = "raw_api_test_gpu", size = "medium", srcs = [], - args = [ - "--xla_test_device=XLA_GPU", - "--xla_platform=GPU", - ], + args = ["--xla_test_device=XLA_GPU"], tags = tf_cuda_tests_tags(), deps = [ ":raw_api_test_lib", diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index ee6734020d..9fc01e6304 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -22,13 +22,10 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h" @@ -46,7 +43,6 @@ namespace tensorflow { namespace { string* xla_test_device_ptr; // initial value set in main() -string* xla_platform_ptr; // initial value set in main() string DeviceFromFlag() { string xla_test_device = *xla_test_device_ptr; @@ -149,28 +145,6 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation, *dst = *snapshot; } -xla::ProgramShape XlaCompiledProgramShape( - const xla::XlaComputation& computation, - const xla::ProgramShape& input_program_shape) { - se::Platform* platform = - xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie(); - xla::LocalClient* client = - xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); - xla::ExecutableBuildOptions exec_options; - exec_options.set_result_layout(input_program_shape.result()); - std::vector parameters_shapes; - for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) { - parameters_shapes.push_back(&input_program_shape.parameters(i)); - } - auto local_executable = - client->Compile(computation, parameters_shapes, exec_options) - .ValueOrDie(); - return local_executable->executable() - ->module() - .entry_computation() - ->ComputeProgramShape(); -} - TEST(RawApiTest, ReadAndWriteState) { xrt::XLAAllocation alloc; alloc.set_device_ordinal(0); @@ -364,87 +338,20 @@ TEST(RawApiTest, CompileAndExecute) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle.handle, e_config, + auto result = ops::XRTExecute(root, c_handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs)); + TF_EXPECT_OK(session.Run({read_back}, &outputs)); xla::LiteralProto response; EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); auto expected = xla::LiteralUtil::CreateR1({27.0f, 21.0f}); EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); - - xla::ProgramShape program_shape; - EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec()(0))); - EXPECT_EQ(program_shape.parameters_size(), 2); -} - -TEST(RawApiTest, CompileWithXlaReturnShapes) { - xla::XlaBuilder builder("XrtXlaShapes"); - auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128}); - auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5}); - // Clear layouts to signal XLA we are ready to get whatever are coming out of - // the compilation process. - xla::LayoutUtil::ClearLayout(&input_shape); - xla::LayoutUtil::ClearLayout(&kernel_shape); - auto param_shape = - xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape}); - auto param = xla::Parameter(&builder, 0, param_shape, "param"); - auto input = xla::GetTupleElement(param, 0); - auto kernel = xla::GetTupleElement(param, 1); - xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame); - TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build()); - - auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result(); - // Clear the result shape layout to tell XLA we are accepting whatever are - // coming out of the compilation process. - xla::LayoutUtil::ClearLayout(&result_shape); - - xrt::XLAComputation c; - auto config = c.mutable_config(); - auto shapes = config->mutable_program_shape(); - *shapes->add_parameters() = param_shape; - *shapes->mutable_result() = result_shape; - StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot()); - - Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); - auto computation = - ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); - auto c_handle = ops::XRTCompile(root, computation); - auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle); - TF_ASSERT_OK(root.status()); - - ClientSession session(root); - std::vector outputs; - TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(), - {c_handle.program_shape}, {release}, &outputs)); - - xla::ProgramShape program_shape; - EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec()(0))); - EXPECT_EQ(program_shape.parameters_size(), 1); - - VLOG(2) << "Param: " - << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0)); - VLOG(2) << "Result: " - << xla::ShapeUtil::HumanStringWithLayout(program_shape.result()); - - xla::ProgramShape xla_program_shape = - XlaCompiledProgramShape(xla_computation, *shapes); - EXPECT_TRUE(xla::LayoutUtil::Equal( - xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(), - xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0}) - .layout())); - EXPECT_TRUE(xla::LayoutUtil::Equal( - xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(), - xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1}) - .layout())); - EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(), - xla_program_shape.result().layout())); } TEST(RawApiTest, CompileAndExecuteZeroArg) { @@ -464,7 +371,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) { auto computation = ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); auto c_handle = ops::XRTCompile(root, computation); - auto result = ops::XRTExecute(root, c_handle.handle, e_config, + auto result = ops::XRTExecute(root, c_handle, e_config, std::initializer_list({})); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -513,7 +420,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle.handle, e_config, + auto result = ops::XRTExecute(root, c_handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -548,7 +455,7 @@ TEST(RawApiTest, LeakCompilationReference) { ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs)); + TF_EXPECT_OK(session.Run({c_handle}, &outputs)); } } // namespace @@ -557,12 +464,9 @@ TEST(RawApiTest, LeakCompilationReference) { int main(int argc, char** argv) { tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU"); - tensorflow::xla_platform_ptr = new tensorflow::string("CPU"); std::vector flag_list = { tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr, "Tensorflow device type to use for test, e.g., XLA_CPU"), - tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr, - "The XLA platform to select for the device"), }; tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list); const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); -- GitLab From 0f79b5ff0d557abe5f6b21da10ae5a2570fd21d9 Mon Sep 17 00:00:00 2001 From: Andy Craze Date: Wed, 10 Oct 2018 19:41:32 -0700 Subject: [PATCH 261/411] Update momentum.py fix docstring line lengths --- tensorflow/python/training/momentum.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py index 34c74cda4e..4a280e7c51 100644 --- a/tensorflow/python/training/momentum.py +++ b/tensorflow/python/training/momentum.py @@ -60,9 +60,10 @@ class MomentumOptimizer(optimizer.Optimizer): variable(s) passed to the optimizer. Using Nesterov Momentum makes the variable(s) track the values called `theta_t + mu*v_t` in the paper. This implementation is an approximation of the original formula, valid - for high values of momentum. It will compute the "adjusted gradient" in NAG - by assuming that the new gradient will be estimated by the current - average gradient plus the product of momentum and the change in the average gradient. + for high values of momentum. It will compute the "adjusted gradient" + in NAG by assuming that the new gradient will be estimated by the + current average gradient plus the product of momentum and the change + in the average gradient. @compatibility(eager) When eager execution is enabled, `learning_rate` and `momentum` can each be -- GitLab From 2b8f59243e412d58ec88f12399478b6ff022b3a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 10 Oct 2018 20:32:01 -0700 Subject: [PATCH 262/411] Add Range to schema. PiperOrigin-RevId: 216638239 --- tensorflow/contrib/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 6117cbf9f1..eb26c2dbdb 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -121,6 +121,7 @@ typedef enum { kTfLiteBuiltinZerosLike = 93, kTfLiteBuiltinFill = 94, kTfLiteBuiltinFloorMod = 95, + kTfLiteBuiltinRange = 96, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index 890d9c04bb..348ce54dd7 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -652,6 +652,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_ZEROS_LIKE: case BuiltinOperator_FILL: case BuiltinOperator_FLOOR_MOD: + case BuiltinOperator_RANGE: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index c7005eb53e..31f2333520 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -680,6 +680,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_ZEROS_LIKE: case tflite::BuiltinOperator_FILL: case tflite::BuiltinOperator_FLOOR_MOD: + case tflite::BuiltinOperator_RANGE: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 2b36209e5f..73805b7618 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -177,6 +177,7 @@ enum BuiltinOperator : byte { ZEROS_LIKE = 93, FILL = 94, FLOOR_MOD = 95, + RANGE = 96, } // Options for the builtin operators. @@ -253,6 +254,7 @@ union BuiltinOptions { BidirectionalSequenceRNNOptions, UnidirectionalSequenceLSTMOptions, FloorModOptions, + RangeOptions, } enum Padding : byte { SAME, VALID } @@ -623,6 +625,9 @@ table FillOptions { table FloorModOptions { } +table RangeOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 3aaa99ec55..4426b7d407 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -244,6 +244,9 @@ struct FillOptionsT; struct FloorModOptions; struct FloorModOptionsT; +struct RangeOptions; +struct RangeOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -405,11 +408,12 @@ enum BuiltinOperator { BuiltinOperator_ZEROS_LIKE = 93, BuiltinOperator_FILL = 94, BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_FLOOR_MOD + BuiltinOperator_MAX = BuiltinOperator_RANGE }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[96] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -505,7 +509,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] { BuiltinOperator_SQUARE, BuiltinOperator_ZEROS_LIKE, BuiltinOperator_FILL, - BuiltinOperator_FLOOR_MOD + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE }; return values; } @@ -608,6 +613,7 @@ inline const char * const *EnumNamesBuiltinOperator() { "ZEROS_LIKE", "FILL", "FLOOR_MOD", + "RANGE", nullptr }; return names; @@ -692,11 +698,12 @@ enum BuiltinOptions { BuiltinOptions_BidirectionalSequenceRNNOptions = 70, BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_FloorModOptions + BuiltinOptions_MAX = BuiltinOptions_RangeOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[74] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -770,7 +777,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] { BuiltinOptions_BidirectionalSequenceLSTMOptions, BuiltinOptions_BidirectionalSequenceRNNOptions, BuiltinOptions_UnidirectionalSequenceLSTMOptions, - BuiltinOptions_FloorModOptions + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions }; return values; } @@ -850,6 +858,7 @@ inline const char * const *EnumNamesBuiltinOptions() { "BidirectionalSequenceRNNOptions", "UnidirectionalSequenceLSTMOptions", "FloorModOptions", + "RangeOptions", nullptr }; return names; @@ -1152,6 +1161,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1759,6 +1772,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_FloorModOptions ? reinterpret_cast(value) : nullptr; } + RangeOptionsT *AsRangeOptions() { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } + const RangeOptionsT *AsRangeOptions() const { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -6302,6 +6323,46 @@ inline flatbuffers::Offset CreateFloorModOptions( flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct RangeOptionsT : public flatbuffers::NativeTable { + typedef RangeOptions TableType; + RangeOptionsT() { + } +}; + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RangeOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RangeOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRangeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -6651,6 +6712,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const FloorModOptions *builtin_options_as_FloorModOptions() const { return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast(builtin_options()) : nullptr; } + const RangeOptions *builtin_options_as_RangeOptions() const { + return builtin_options_type() == BuiltinOptions_RangeOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6970,6 +7034,10 @@ template<> inline const FloorModOptions *Operator::builtin_options_as inline const RangeOptions *Operator::builtin_options_as() const { + return builtin_options_as_RangeOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -9377,6 +9445,29 @@ inline flatbuffers::Offset CreateFloorModOptions(flatbuffers::F _fbb); } +inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new RangeOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRangeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRangeOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -9854,6 +9945,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -10160,6 +10255,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -10454,6 +10553,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateFloorModOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + return CreateRangeOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10748,6 +10851,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new FloorModOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_RangeOptions: { + value = new RangeOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -11115,6 +11222,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 96a633367ecd5ae9b31e128c2436b1a3f81b27fd Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 10 Oct 2018 20:50:21 -0700 Subject: [PATCH 263/411] Part 3/4 of the update of tf.keras to the 2.2.4 API. PiperOrigin-RevId: 216639755 --- tensorflow/python/keras/BUILD | 13 + .../python/keras/engine/training_generator.py | 19 +- .../keras/engine/training_generator_test.py | 307 +++++++++++++ .../python/keras/engine/training_test.py | 275 ----------- tensorflow/python/keras/utils/data_utils.py | 428 ++++++++---------- .../python/keras/utils/data_utils_test.py | 4 +- ...flow.keras.utils.-generator-enqueuer.pbtxt | 2 +- ...rflow.keras.utils.-sequence-enqueuer.pbtxt | 1 + ...flow.keras.utils.-generator-enqueuer.pbtxt | 2 +- ...rflow.keras.utils.-sequence-enqueuer.pbtxt | 1 + tensorflow/tools/ci_build/ci_sanity.sh | 3 +- 11 files changed, 517 insertions(+), 538 deletions(-) create mode 100644 tensorflow/python/keras/engine/training_generator_test.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index c4d23f117f..a566c9acab 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -718,6 +718,19 @@ py_test( ], ) +py_test( + name = "training_generator_test", + size = "enormous", + srcs = ["engine/training_generator_test.py"], + srcs_version = "PY2AND3", + tags = ["notsan"], + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "feature_columns_integration_test", size = "small", diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py index 2e074699da..21f44423ec 100644 --- a/tensorflow/python/keras/engine/training_generator.py +++ b/tensorflow/python/keras/engine/training_generator.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.keras import callbacks as cbks from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer +from tensorflow.python.keras.utils.data_utils import iter_sequence_infinite from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras.utils.data_utils import Sequence from tensorflow.python.keras.utils.generic_utils import Progbar @@ -45,7 +46,6 @@ def fit_generator(model, shuffle=True, initial_epoch=0): """See docstring for `Model.fit_generator`.""" - wait_time = 0.01 # in seconds epoch = initial_epoch do_validation = bool(validation_data) @@ -124,13 +124,12 @@ def fit_generator(model, else: enqueuer = GeneratorEnqueuer( generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) + use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: - output_generator = iter(generator) + output_generator = iter_sequence_infinite(generator) else: output_generator = generator @@ -251,7 +250,6 @@ def evaluate_generator(model, stateful_metric_indices = [] steps_done = 0 - wait_time = 0.01 all_outs = [] batch_sizes = [] is_sequence = isinstance(generator, Sequence) @@ -279,13 +277,12 @@ def evaluate_generator(model, else: enqueuer = GeneratorEnqueuer( generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) + use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: - output_generator = iter(generator) + output_generator = iter_sequence_infinite(generator) else: output_generator = generator @@ -354,7 +351,6 @@ def predict_generator(model, model._make_test_function() steps_done = 0 - wait_time = 0.01 all_outs = [] is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: @@ -381,13 +377,12 @@ def predict_generator(model, else: enqueuer = GeneratorEnqueuer( generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) + use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: - output_generator = iter(generator) + output_generator = iter_sequence_infinite(generator) else: output_generator = generator diff --git a/tensorflow/python/keras/engine/training_generator_test.py b/tensorflow/python/keras/engine/training_generator_test.py new file mode 100644 index 0000000000..88e8943424 --- /dev/null +++ b/tensorflow/python/keras/engine/training_generator_test.py @@ -0,0 +1,307 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for training routines.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import unittest + +import numpy as np + +from tensorflow.python import keras +from tensorflow.python.framework import test_util as tf_test_util +from tensorflow.python.keras import metrics as metrics_module +from tensorflow.python.platform import test +from tensorflow.python.training.rmsprop import RMSPropOptimizer + + +class TestGeneratorMethods(test.TestCase): + + @unittest.skipIf( + os.name == 'nt', + 'use_multiprocessing=True does not work on windows properly.') + def test_generator_methods(self): + arr_data = np.random.random((50, 2)) + arr_labels = np.random.random((50,)) + + def custom_generator(): + batch_size = 10 + num_samples = 50 + while True: + batch_index = np.random.randint(0, num_samples - batch_size) + start = batch_index + end = start + batch_size + x = arr_data[start: end] + y = arr_labels[start: end] + yield x, y + + with self.cached_session(): + x = keras.Input((2,)) + y = keras.layers.Dense(1)(x) + fn_model = keras.models.Model(x, y) + fn_model.compile( + loss='mse', + optimizer='sgd', + metrics=['mae', metrics_module.CategoricalAccuracy()]) + + seq_model = keras.models.Sequential() + seq_model.add(keras.layers.Dense(1, input_shape=(2,))) + seq_model.compile(loss='mse', optimizer='sgd') + + for model in [fn_model, seq_model]: + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + workers=4, + use_multiprocessing=True) + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False) + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator(), + validation_steps=10) + model.fit_generator(custom_generator(), + steps_per_epoch=5, + validation_data=custom_generator(), + validation_steps=1, + workers=0) + model.predict_generator(custom_generator(), + steps=5, + max_queue_size=10, + workers=2, + use_multiprocessing=True) + model.predict_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + model.predict_generator(custom_generator(), + steps=5, + max_queue_size=10, + workers=0) + model.evaluate_generator(custom_generator(), + steps=5, + max_queue_size=10, + workers=2, + verbose=1, + use_multiprocessing=True) + model.evaluate_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + model.evaluate_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False, + workers=0) + + def test_generator_methods_with_sample_weights(self): + arr_data = np.random.random((50, 2)) + arr_labels = np.random.random((50,)) + arr_sample_weights = np.random.random((50,)) + + def custom_generator(): + batch_size = 10 + num_samples = 50 + while True: + batch_index = np.random.randint(0, num_samples - batch_size) + start = batch_index + end = start + batch_size + x = arr_data[start: end] + y = arr_labels[start: end] + w = arr_sample_weights[start: end] + yield x, y, w + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_shape=(2,))) + model.compile( + loss='mse', + optimizer='sgd', + metrics=['mae', metrics_module.CategoricalAccuracy()]) + + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False) + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator(), + validation_steps=10) + model.predict_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + model.evaluate_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + + def test_generator_methods_invalid_use_case(self): + + def custom_generator(): + while 1: + yield 0 + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_shape=(2,))) + model.compile(loss='mse', optimizer='sgd') + + with self.assertRaises(ValueError): + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False) + with self.assertRaises(ValueError): + model.fit_generator(custom_generator(), + steps_per_epoch=5, + epochs=1, + verbose=1, + max_queue_size=10, + use_multiprocessing=False, + validation_data=custom_generator(), + validation_steps=10) + with self.assertRaises(AttributeError): + model.predict_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + with self.assertRaises(ValueError): + model.evaluate_generator(custom_generator(), + steps=5, + max_queue_size=10, + use_multiprocessing=False) + + def test_training_with_sequences(self): + + class DummySequence(keras.utils.Sequence): + + def __getitem__(self, idx): + return np.zeros([10, 2]), np.ones([10]) + + def __len__(self): + return 10 + + arr_data = np.random.random((50, 2)) + arr_labels = np.random.random((50,)) + arr_sample_weights = np.random.random((50,)) + + def custom_generator(): + batch_size = 10 + num_samples = 50 + while True: + batch_index = np.random.randint(0, num_samples - batch_size) + start = batch_index + end = start + batch_size + x = arr_data[start: end] + y = arr_labels[start: end] + w = arr_sample_weights[start: end] + yield x, y, w + + with self.cached_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_shape=(2,))) + model.compile(loss='mse', optimizer='sgd') + + model.fit_generator(DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=True) + model.fit_generator(DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=False) + + @tf_test_util.run_in_graph_and_eager_modes + def test_generator_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + def custom_generator(): + while True: + yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + inputs = keras.layers.Input(shape=(10,)) + x = keras.layers.Dense(10, activation='relu')(inputs) + outputs = keras.layers.Dense(1, activation='sigmoid')(x) + model = keras.Model(inputs, outputs) + + model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') + model.fit( + custom_generator(), + steps_per_epoch=2, + validation_data=val_data, + epochs=2) + model.evaluate(custom_generator(), steps=2) + model.predict(custom_generator(), steps=2) + + @tf_test_util.run_in_graph_and_eager_modes + def test_sequence_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + class CustomSequence(keras.utils.Sequence): + + def __getitem__(self, idx): + return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + def __len__(self): + return 2 + + inputs = keras.layers.Input(shape=(10,)) + x = keras.layers.Dense(10, activation='relu')(inputs) + outputs = keras.layers.Dense(1, activation='sigmoid')(x) + model = keras.Model(inputs, outputs) + + model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') + model.fit(CustomSequence(), validation_data=val_data, epochs=2) + model.evaluate(CustomSequence()) + model.predict(CustomSequence()) + + with self.assertRaisesRegexp(ValueError, '`y` argument is not supported'): + model.fit(CustomSequence(), y=np.ones([10, 1])) + + with self.assertRaisesRegexp(ValueError, + '`sample_weight` argument is not supported'): + model.fit(CustomSequence(), sample_weight=np.ones([10, 1])) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 868fd1dc69..bd6b0e1aa1 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -19,8 +19,6 @@ from __future__ import division from __future__ import print_function import logging -import os -import unittest import numpy as np @@ -1102,279 +1100,6 @@ class TestDynamicTrainability(test.TestCase): self.assertListEqual(outer_model.trainable_weights, []) -class TestGeneratorMethods(test.TestCase): - - @unittest.skipIf( - os.name == 'nt', - 'use_multiprocessing=True does not work on windows properly.') - def test_generator_methods(self): - arr_data = np.random.random((50, 2)) - arr_labels = np.random.random((50,)) - - def custom_generator(): - batch_size = 10 - num_samples = 50 - while True: - batch_index = np.random.randint(0, num_samples - batch_size) - start = batch_index - end = start + batch_size - x = arr_data[start: end] - y = arr_labels[start: end] - yield x, y - - with self.cached_session(): - x = keras.Input((2,)) - y = keras.layers.Dense(1)(x) - fn_model = keras.models.Model(x, y) - fn_model.compile( - loss='mse', - optimizer='sgd', - metrics=['mae', metrics_module.CategoricalAccuracy()]) - - seq_model = keras.models.Sequential() - seq_model.add(keras.layers.Dense(1, input_shape=(2,))) - seq_model.compile(loss='mse', optimizer='sgd') - - for model in [fn_model, seq_model]: - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - workers=4, - use_multiprocessing=True) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator(), - validation_steps=10) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - validation_data=custom_generator(), - validation_steps=1, - workers=0) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - workers=2, - use_multiprocessing=True) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - workers=0) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - workers=2, - verbose=1, - use_multiprocessing=True) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False, - workers=0) - - def test_generator_methods_with_sample_weights(self): - arr_data = np.random.random((50, 2)) - arr_labels = np.random.random((50,)) - arr_sample_weights = np.random.random((50,)) - - def custom_generator(): - batch_size = 10 - num_samples = 50 - while True: - batch_index = np.random.randint(0, num_samples - batch_size) - start = batch_index - end = start + batch_size - x = arr_data[start: end] - y = arr_labels[start: end] - w = arr_sample_weights[start: end] - yield x, y, w - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_shape=(2,))) - model.compile( - loss='mse', - optimizer='sgd', - metrics=['mae', metrics_module.CategoricalAccuracy()]) - - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator(), - validation_steps=10) - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - - def test_generator_methods_invalid_use_case(self): - - def custom_generator(): - while 1: - yield 0 - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='sgd') - - with self.assertRaises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False) - with self.assertRaises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=5, - epochs=1, - verbose=1, - max_queue_size=10, - use_multiprocessing=False, - validation_data=custom_generator(), - validation_steps=10) - with self.assertRaises(AttributeError): - model.predict_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - with self.assertRaises(ValueError): - model.evaluate_generator(custom_generator(), - steps=5, - max_queue_size=10, - use_multiprocessing=False) - - def test_training_with_sequences(self): - - class DummySequence(keras.utils.Sequence): - - def __getitem__(self, idx): - return np.zeros([10, 2]), np.ones([10]) - - def __len__(self): - return 10 - - arr_data = np.random.random((50, 2)) - arr_labels = np.random.random((50,)) - arr_sample_weights = np.random.random((50,)) - - def custom_generator(): - batch_size = 10 - num_samples = 50 - while True: - batch_index = np.random.randint(0, num_samples - batch_size) - start = batch_index - end = start + batch_size - x = arr_data[start: end] - y = arr_labels[start: end] - w = arr_sample_weights[start: end] - yield x, y, w - - with self.cached_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='sgd') - - model.fit_generator(DummySequence(), - steps_per_epoch=10, - validation_data=custom_generator(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - model.fit_generator(DummySequence(), - steps_per_epoch=10, - validation_data=custom_generator(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - @tf_test_util.run_in_graph_and_eager_modes - def test_generator_input_to_fit_eval_predict(self): - val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - def custom_generator(): - while True: - yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - inputs = keras.layers.Input(shape=(10,)) - x = keras.layers.Dense(10, activation='relu')(inputs) - outputs = keras.layers.Dense(1, activation='sigmoid')(x) - model = keras.Model(inputs, outputs) - - model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') - model.fit( - custom_generator(), - steps_per_epoch=2, - validation_data=val_data, - epochs=2) - model.evaluate(custom_generator(), steps=2) - model.predict(custom_generator(), steps=2) - - @tf_test_util.run_in_graph_and_eager_modes - def test_sequence_input_to_fit_eval_predict(self): - val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - class CustomSequence(keras.utils.Sequence): - - def __getitem__(self, idx): - return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) - - def __len__(self): - return 2 - - inputs = keras.layers.Input(shape=(10,)) - x = keras.layers.Dense(10, activation='relu')(inputs) - outputs = keras.layers.Dense(1, activation='sigmoid')(x) - model = keras.Model(inputs, outputs) - - model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') - model.fit(CustomSequence(), validation_data=val_data, epochs=2) - model.evaluate(CustomSequence()) - model.predict(CustomSequence()) - - with self.assertRaisesRegexp(ValueError, '`y` argument is not supported'): - model.fit(CustomSequence(), y=np.ones([10, 1])) - - with self.assertRaisesRegexp(ValueError, - '`sample_weight` argument is not supported'): - model.fit(CustomSequence(), sample_weight=np.ones([10, 1])) - - class TestTrainingUtils(test.TestCase): def test_check_array_lengths(self): diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py index b736daa46d..01a9d61a84 100644 --- a/tensorflow/python/keras/utils/data_utils.py +++ b/tensorflow/python/keras/utils/data_utils.py @@ -30,7 +30,6 @@ import sys import tarfile import threading import time -import traceback import zipfile import numpy as np @@ -117,16 +116,16 @@ def _extract_archive(file_path, path='.', archive_format='auto'): """ if archive_format is None: return False - if archive_format is 'auto': + if archive_format == 'auto': archive_format = ['tar', 'zip'] if isinstance(archive_format, six.string_types): archive_format = [archive_format] for archive_type in archive_format: - if archive_type is 'tar': + if archive_type == 'tar': open_fn = tarfile.open is_match_fn = tarfile.is_tarfile - if archive_type is 'zip': + if archive_type == 'zip': open_fn = zipfile.ZipFile is_match_fn = zipfile.is_zipfile @@ -237,7 +236,7 @@ def get_file(fname, def dl_progress(count, block_size, total_size): if ProgressTracker.progbar is None: - if total_size is -1: + if total_size == -1: total_size = None ProgressTracker.progbar = Progbar(total_size) else: @@ -288,7 +287,7 @@ def _hash_file(fpath, algorithm='sha256', chunk_size=65535): Returns: The file hash """ - if (algorithm is 'sha256') or (algorithm is 'auto' and len(hash) is 64): + if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64): hasher = hashlib.sha256() else: hasher = hashlib.md5() @@ -314,8 +313,7 @@ def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): Returns: Whether the file is valid """ - if ((algorithm is 'sha256') or - (algorithm is 'auto' and len(file_hash) is 64)): + if (algorithm == 'sha256') or (algorithm == 'auto' and len(file_hash) == 64): hasher = 'sha256' else: hasher = 'md5' @@ -400,14 +398,23 @@ class Sequence(object): pass def __iter__(self): - """Creates an infinite generator that iterate over the Sequence. + """Create a generator that iterate over the Sequence.""" + for item in (self[i] for i in range(len(self))): + yield item - Yields: - Sequence items. - """ - while True: - for item in (self[i] for i in range(len(self))): - yield item + +def iter_sequence_infinite(seq): + """Iterates indefinitely over a Sequence. + + Arguments: + seq: Sequence instance. + + Yields: + Batches of data from the Sequence. + """ + while True: + for item in seq: + yield item # Global variables to be shared across processes @@ -445,7 +452,7 @@ class SequenceEnqueuer(object): The task of an Enqueuer is to use parallelism to speed up preprocessing. This is done with processes or threads. - Examples: + Example: ```python enqueuer = SequenceEnqueuer(...) @@ -458,61 +465,10 @@ class SequenceEnqueuer(object): ``` The `enqueuer.get()` should be an infinite stream of datas. - """ - @abstractmethod - def is_running(self): - raise NotImplementedError - - @abstractmethod - def start(self, workers=1, max_queue_size=10): - """Starts the handler's workers. - - Arguments: - workers: number of worker threads - max_queue_size: queue size - (when full, threads could block on `put()`). - """ - raise NotImplementedError - - @abstractmethod - def stop(self, timeout=None): - """Stop running threads and wait for them to exit, if necessary. - - Should be called by the same thread which called start(). - - Arguments: - timeout: maximum time to wait on thread.join() - """ - raise NotImplementedError - - @abstractmethod - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - Returns: - Generator yielding tuples `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - """ - raise NotImplementedError - - -@tf_export('keras.utils.OrderedEnqueuer') -class OrderedEnqueuer(SequenceEnqueuer): - """Builds a Enqueuer from a Sequence. - - Used in `fit_generator`, `evaluate_generator`, `predict_generator`. - - Arguments: - sequence: A `keras.utils.data_utils.Sequence` object. - use_multiprocessing: use multiprocessing if True, otherwise threading - shuffle: whether to shuffle the data at the beginning of each epoch - """ - - def __init__(self, sequence, use_multiprocessing=False, shuffle=False): + def __init__(self, sequence, + use_multiprocessing=False): self.sequence = sequence self.use_multiprocessing = use_multiprocessing @@ -535,7 +491,6 @@ class OrderedEnqueuer(SequenceEnqueuer): self.uid = _SEQUENCE_COUNTER.value _SEQUENCE_COUNTER.value += 1 - self.shuffle = shuffle self.workers = 0 self.executor_fn = None self.queue = None @@ -546,16 +501,15 @@ class OrderedEnqueuer(SequenceEnqueuer): return self.stop_signal is not None and not self.stop_signal.is_set() def start(self, workers=1, max_queue_size=10): - """Start the handler's workers. + """Starts the handler's workers. Arguments: - workers: number of worker threads + workers: Number of workers. max_queue_size: queue size (when full, workers could block on `put()`) """ if self.use_multiprocessing: - self.executor_fn = lambda seqs: multiprocessing.Pool( # pylint: disable=g-long-lambda - workers, initializer=init_pool, initargs=(seqs,)) + self.executor_fn = self._get_executor_init(workers) else: # We do not need the init since it's threads. self.executor_fn = lambda _: ThreadPool(workers) @@ -566,6 +520,87 @@ class OrderedEnqueuer(SequenceEnqueuer): self.run_thread.daemon = True self.run_thread.start() + def _send_sequence(self): + """Sends current Iterable to all workers.""" + # For new processes that may spawn + _SHARED_SEQUENCES[self.uid] = self.sequence + + def stop(self, timeout=None): + """Stops running threads and wait for them to exit, if necessary. + + Should be called by the same thread which called `start()`. + + Arguments: + timeout: maximum time to wait on `thread.join()` + """ + self.stop_signal.set() + with self.queue.mutex: + self.queue.queue.clear() + self.queue.unfinished_tasks = 0 + self.queue.not_full.notify() + self.run_thread.join(timeout) + _SHARED_SEQUENCES[self.uid] = None + + @abstractmethod + def _run(self): + """Submits request to the executor and queue the `Future` objects.""" + raise NotImplementedError + + @abstractmethod + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. + + Arguments: + workers: Number of workers. + + Returns: + Function, a Function to initialize the pool + """ + raise NotImplementedError + + @abstractmethod + def get(self): + """Creates a generator to extract data from the queue. + + Skip the data if it is `None`. + # Returns + Generator yielding tuples `(inputs, targets)` + or `(inputs, targets, sample_weights)`. + """ + raise NotImplementedError + + +@tf_export('keras.utils.OrderedEnqueuer') +class OrderedEnqueuer(SequenceEnqueuer): + """Builds a Enqueuer from a Sequence. + + Used in `fit_generator`, `evaluate_generator`, `predict_generator`. + + Arguments: + sequence: A `tf.keras.utils.data_utils.Sequence` object. + use_multiprocessing: use multiprocessing if True, otherwise threading + shuffle: whether to shuffle the data at the beginning of each epoch + """ + + def __init__(self, sequence, use_multiprocessing=False, shuffle=False): + super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing) + self.shuffle = shuffle + + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. + + Arguments: + workers: Number of workers. + + Returns: + Function, a Function to initialize the pool + """ + def pool_fn(seqs): + return multiprocessing.Pool(workers, + initializer=init_pool_generator, + initargs=(seqs, self.random_seed)) + return pool_fn + def _wait_queue(self): """Wait for the queue to be empty.""" while True: @@ -615,30 +650,34 @@ class OrderedEnqueuer(SequenceEnqueuer): self.queue.task_done() if inputs is not None: yield inputs - except Exception as e: # pylint: disable=broad-except + except Exception: # pylint: disable=broad-except self.stop() - six.raise_from(StopIteration(e), e) + six.reraise(*sys.exc_info()) - def _send_sequence(self): - """Send current Sequence to all workers.""" - # For new processes that may spawn - _SHARED_SEQUENCES[self.uid] = self.sequence - def stop(self, timeout=None): - """Stops running threads and wait for them to exit, if necessary. +def init_pool_generator(gens, random_seed=None): + global _SHARED_SEQUENCES + _SHARED_SEQUENCES = gens - Should be called by the same thread which called `start()`. + if random_seed is not None: + ident = multiprocessing.current_process().ident + np.random.seed(random_seed + ident) - Arguments: - timeout: maximum time to wait on `thread.join()` - """ - self.stop_signal.set() - with self.queue.mutex: - self.queue.queue.clear() - self.queue.unfinished_tasks = 0 - self.queue.not_full.notify() - self.run_thread.join(timeout) - _SHARED_SEQUENCES[self.uid] = None + +def next_sample(uid): + """Gets the next value from the generator `uid`. + + To allow multiple generators to be used at the same time, we use `uid` to + get a specific one. A single generator would cause the validation to + overwrite the training generator. + + Arguments: + uid: int, generator identifier + + Returns: + The next value of generator `uid`. + """ + return six.next(_SHARED_SEQUENCES[uid]) @tf_export('keras.utils.GeneratorEnqueuer') @@ -658,145 +697,36 @@ class GeneratorEnqueuer(SequenceEnqueuer): will be incremented by one for each worker. """ - def __init__(self, - generator, + def __init__(self, sequence, use_multiprocessing=False, - wait_time=0.05, - seed=None): - self.wait_time = wait_time - self._generator = generator - if os.name is 'nt' and use_multiprocessing is True: - # On Windows, avoid **SYSTEMATIC** error in `multiprocessing`: - # `TypeError: can't pickle generator objects` - # => Suggest multithreading instead of multiprocessing on Windows - raise ValueError('Using a generator with `use_multiprocessing=True`' - ' is not supported on Windows (no marshalling of' - ' generators across process boundaries). Instead,' - ' use single thread/process or multithreading.') - else: - self._use_multiprocessing = use_multiprocessing - self._threads = [] - self._stop_event = None - self._manager = None - self.queue = None - self.seed = seed - - def _data_generator_task(self): - if self._use_multiprocessing is False: - while not self._stop_event.is_set(): - with self.genlock: - try: - if (self.queue is not None and - self.queue.qsize() < self.max_queue_size): - # On all OSes, avoid **SYSTEMATIC** error - # in multithreading mode: - # `ValueError: generator already executing` - # => Serialize calls to - # infinite iterator/generator's next() function - generator_output = next(self._generator) - self.queue.put((True, generator_output)) - else: - time.sleep(self.wait_time) - except StopIteration: - break - except Exception as e: # pylint: disable=broad-except - # Can't pickle tracebacks. - # As a compromise, print the traceback and pickle None instead. - if not hasattr(e, '__traceback__'): - setattr(e, '__traceback__', sys.exc_info()[2]) - self.queue.put((False, e)) - self._stop_event.set() - break - else: - while not self._stop_event.is_set(): - try: - if (self.queue is not None and - self.queue.qsize() < self.max_queue_size): - generator_output = next(self._generator) - self.queue.put((True, generator_output)) - else: - time.sleep(self.wait_time) - except StopIteration: - break - except Exception as e: # pylint: disable=broad-except - # Can't pickle tracebacks. - # As a compromise, print the traceback and pickle None instead. - traceback.print_exc() - setattr(e, '__traceback__', None) - self.queue.put((False, e)) - self._stop_event.set() - break + random_seed=None): + super(GeneratorEnqueuer, self).__init__(sequence, use_multiprocessing) + self.random_seed = random_seed - def start(self, workers=1, max_queue_size=10): - """Kicks off threads which add data from the generator into the queue. + def _get_executor_init(self, workers): + """Gets the Pool initializer for multiprocessing. Arguments: - workers: number of worker threads - max_queue_size: queue size - (when full, threads could block on `put()`) - """ - try: - self.max_queue_size = max_queue_size - if self._use_multiprocessing: - self._manager = multiprocessing.Manager() - self.queue = self._manager.Queue(maxsize=max_queue_size) - self._stop_event = multiprocessing.Event() - else: - # On all OSes, avoid **SYSTEMATIC** error in multithreading mode: - # `ValueError: generator already executing` - # => Serialize calls to infinite iterator/generator's next() function - self.genlock = threading.Lock() - self.queue = queue.Queue(maxsize=max_queue_size) - self._stop_event = threading.Event() - - for _ in range(workers): - if self._use_multiprocessing: - # Reset random seed else all children processes - # share the same seed - np.random.seed(self.seed) - thread = multiprocessing.Process(target=self._data_generator_task) - thread.daemon = True - if self.seed is not None: - self.seed += 1 - else: - thread = threading.Thread(target=self._data_generator_task) - self._threads.append(thread) - thread.start() - except: - self.stop() - raise - - def is_running(self): - return self._stop_event is not None and not self._stop_event.is_set() - - def stop(self, timeout=None): - """Stops running threads and wait for them to exit, if necessary. - - Should be called by the same thread which called `start()`. + workers: Number of works. - Arguments: - timeout: maximum time to wait on `thread.join()`. + Returns: + A Function to initialize the pool """ - if self.is_running(): - self._stop_event.set() - - for thread in self._threads: - if self._use_multiprocessing: - if thread.is_alive(): - thread.terminate() - else: - # The thread.is_alive() test is subject to a race condition: - # the thread could terminate right after the test and before the - # join, rendering this test meaningless -> Call thread.join() - # always, which is ok no matter what the status of the thread. - thread.join(timeout) - - if self._manager: - self._manager.shutdown() + def pool_fn(seqs): + return multiprocessing.Pool(workers, + initializer=init_pool_generator, + initargs=(seqs, self.random_seed)) + return pool_fn - self._threads = [] - self._stop_event = None - self.queue = None + def _run(self): + """Submits request to the executor and queue the `Future` objects.""" + self._send_sequence() # Share the initial generator + with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: + while True: + if self.stop_signal.is_set(): + return + self.queue.put( + executor.apply_async(next_sample, (self.uid,)), block=True) def get(self): """Creates a generator to extract data from the queue. @@ -808,24 +738,30 @@ class GeneratorEnqueuer(SequenceEnqueuer): `(inputs, targets)` or `(inputs, targets, sample_weights)`. """ - while self.is_running(): - if not self.queue.empty(): - success, value = self.queue.get() - # Rethrow any exceptions found in the queue - if not success: - six.reraise(value.__class__, value, value.__traceback__) - # Yield regular values - if value is not None: - yield value - else: - all_finished = all([not thread.is_alive() for thread in self._threads]) - if all_finished and self.queue.empty(): - raise StopIteration() - else: - time.sleep(self.wait_time) - - # Make sure to rethrow the first exception in the queue, if any - while not self.queue.empty(): - success, value = self.queue.get() - if not success: - six.reraise(value.__class__, value, value.__traceback__) + try: + while self.is_running(): + inputs = self.queue.get(block=True).get() + self.queue.task_done() + if inputs is not None: + yield inputs + except StopIteration: + # Special case for finite generators + last_ones = [] + while self.queue.qsize() > 0: + last_ones.append(self.queue.get(block=True)) + # Wait for them to complete + for f in last_ones: + f.wait() + # Keep the good ones + last_ones = [future.get() for future in last_ones if future.successful()] + for inputs in last_ones: + if inputs is not None: + yield inputs + except Exception as e: # pylint: disable=broad-except + self.stop() + if 'generator already executing' in str(e): + raise RuntimeError( + 'Your generator is NOT thread-safe. ' + 'Keras requires a thread-safe generator when ' + '`use_multiprocessing=False, workers > 1`. ') + six.reraise(*sys.exc_info()) diff --git a/tensorflow/python/keras/utils/data_utils_test.py b/tensorflow/python/keras/utils/data_utils_test.py index 395df7e0e7..cc95803d6d 100644 --- a/tensorflow/python/keras/utils/data_utils_test.py +++ b/tensorflow/python/keras/utils/data_utils_test.py @@ -228,7 +228,7 @@ class TestEnqueuers(test.TestCase): FaultSequence(), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() - with self.assertRaises(StopIteration): + with self.assertRaises(IndexError): next(gen_output) def test_ordered_enqueuer_fail_processes(self): @@ -236,7 +236,7 @@ class TestEnqueuers(test.TestCase): FaultSequence(), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() - with self.assertRaises(StopIteration): + with self.assertRaises(IndexError): next(gen_output) def test_on_epoch_end_processes(self): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt index 939fd547d0..6f5ad2dc96 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], " + argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\', \'random_seed\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt index a9e499d100..aa36d66f92 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" + argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt index 939fd547d0..6f5ad2dc96 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], " + argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\', \'random_seed\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt index a9e499d100..aa36d66f92 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" + argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "get" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index a98c15d961..503e602198 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -104,7 +104,8 @@ do_pylint() { "^tensorflow/python/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ "^tensorflow/python/keras/engine/base_layer.py.*\[E0203.*access-member-before-definition "\ "^tensorflow/python/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\ -"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned" +"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned "\ +"^tensorflow/python/keras/utils/data_utils.py.*\[E1102.*not-callable" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" -- GitLab From 2e75c6321fbcc1225c28de1dc1fc852bc2085881 Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Thu, 11 Oct 2018 08:40:21 +0100 Subject: [PATCH 264/411] Query on Windows as well --- configure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.py b/configure.py index 08e3c17b18..bac689aa08 100644 --- a/configure.py +++ b/configure.py @@ -1560,7 +1560,6 @@ def main(): # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on # Windows. environ_cp['TF_DOWNLOAD_CLANG'] = '0' - environ_cp['TF_ENABLE_XLA'] = '0' environ_cp['TF_NEED_MPI'] = '0' environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0' -- GitLab From 100f9dee3115d892be1a39ae023bd80e24bf70eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 02:02:24 -0700 Subject: [PATCH 265/411] compat: Update forward compatibility horizon to 2018-10-11 PiperOrigin-RevId: 216663386 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index b7a1fce586..292b9a8480 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 10) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 11) @tf_export("compat.forward_compatible") -- GitLab From 6280f4167056e77f6fa4ac986cf15ac50d75991a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 05:53:19 -0700 Subject: [PATCH 266/411] Only file changed was the tensorcore outfeed manager test, which was passing wrong shape. Wonder how that could have ever let this CL to go in in the first place. Automated rollback of commit 905deeaadd41d529461d8a6666e9cf46f0097a8f PiperOrigin-RevId: 216683052 --- .../xla/service/compile_only_service.cc | 2 + .../compiler/xrt/kernels/xrt_compile_ops.cc | 19 +++- .../compiler/xrt/kernels/xrt_execute_op.cc | 8 -- .../compiler/xrt/ops/xrt_compile_ops.cc | 7 +- tensorflow/compiler/xrt/tests/BUILD | 13 ++- tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +++++++++++++++++- 6 files changed, 135 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index 96bd2616f5..bd5045b9b9 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -89,6 +89,8 @@ CompileOnlyService::CompileAheadOfTime( const auto& program_shape = instance.computation.program_shape(); ExecutionOptions execution_options; *execution_options.mutable_debug_options() = debug_options; + *execution_options.mutable_shape_with_output_layout() = + *instance.result_layout; TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(program_shape, instance.argument_layouts, diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 1d4f8d97f2..1ab836a496 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -166,10 +166,21 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Compiling XLA executable"; return Compile(ctx, computation_proto, program); })); - - Tensor output(DT_INT64, TensorShape({})); - output.scalar()() = uid; - ctx->set_output(0, output); + std::unique_ptr entry; + OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry)); + + Tensor handle_output(DT_INT64, TensorShape({})); + handle_output.scalar()() = uid; + ctx->set_output(0, handle_output); + + xla::LocalExecutable* executable = entry->get().get_executable(); + xla::ProgramShape program_shape = executable->executable() + ->module() + .entry_computation() + ->ComputeProgramShape(); + Tensor program_shape_output(DT_STRING, TensorShape({1})); + program_shape_output.vec()(0) = program_shape.SerializeAsString(); + ctx->set_output(1, program_shape_output); } XRTCompileOp::~XRTCompileOp() = default; diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index 257b054f16..3a1e03280a 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -64,14 +64,6 @@ uint32 GetXLARandomSeed() { return counter.fetch_add(2); } -// Looks up the input `key` in the compilation cache. -Status GetComputationCacheEntry( - XRTCompilationCache* cache, int64 key, - std::unique_ptr* entry) { - TF_RETURN_IF_ERROR(cache->Lookup(key, entry)); - return Status::OK(); -} - // Populates `inputs` with the input tensors to the computation. Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm, bool release_inputs, diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc index 5cfc8711f9..7b3b50c695 100644 --- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc @@ -23,7 +23,12 @@ namespace tensorflow { REGISTER_OP("XRTCompile") .Input("computation: string") .Output("handle: int64") - .SetShapeFn(tensorflow::shape_inference::ScalarShape) + .Output("program_shape: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + c->set_output(1, c->UnknownShapeOfRank(1)); + return Status::OK(); + }) .Doc( R"( Reads a computation proto, compiles it, and places it in the global compilation diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD index b6dcfc4eb9..be44a3474a 100644 --- a/tensorflow/compiler/xrt/tests/BUILD +++ b/tensorflow/compiler/xrt/tests/BUILD @@ -29,8 +29,11 @@ cc_library( "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xrt:xrt_proto", "//tensorflow/compiler/xrt:xrt_server", "//tensorflow/compiler/xrt/cc:xrt_ops", @@ -49,7 +52,10 @@ tf_cc_test( name = "raw_api_test_cpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_CPU"], + args = [ + "--xla_test_device=XLA_CPU", + "--xla_platform=CPU", + ], deps = [ ":raw_api_test_lib", "//tensorflow/compiler/jit:xla_cpu_device", @@ -60,7 +66,10 @@ tf_cuda_cc_test( name = "raw_api_test_gpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_GPU"], + args = [ + "--xla_test_device=XLA_GPU", + "--xla_platform=GPU", + ], tags = tf_cuda_tests_tags(), deps = [ ":raw_api_test_lib", diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index 9fc01e6304..ee6734020d 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -22,10 +22,13 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h" @@ -43,6 +46,7 @@ namespace tensorflow { namespace { string* xla_test_device_ptr; // initial value set in main() +string* xla_platform_ptr; // initial value set in main() string DeviceFromFlag() { string xla_test_device = *xla_test_device_ptr; @@ -145,6 +149,28 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation, *dst = *snapshot; } +xla::ProgramShape XlaCompiledProgramShape( + const xla::XlaComputation& computation, + const xla::ProgramShape& input_program_shape) { + se::Platform* platform = + xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie(); + xla::LocalClient* client = + xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); + xla::ExecutableBuildOptions exec_options; + exec_options.set_result_layout(input_program_shape.result()); + std::vector parameters_shapes; + for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) { + parameters_shapes.push_back(&input_program_shape.parameters(i)); + } + auto local_executable = + client->Compile(computation, parameters_shapes, exec_options) + .ValueOrDie(); + return local_executable->executable() + ->module() + .entry_computation() + ->ComputeProgramShape(); +} + TEST(RawApiTest, ReadAndWriteState) { xrt::XLAAllocation alloc; alloc.set_device_ordinal(0); @@ -338,20 +364,87 @@ TEST(RawApiTest, CompileAndExecute) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({read_back}, &outputs)); + TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs)); xla::LiteralProto response; EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); auto expected = xla::LiteralUtil::CreateR1({27.0f, 21.0f}); EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 2); +} + +TEST(RawApiTest, CompileWithXlaReturnShapes) { + xla::XlaBuilder builder("XrtXlaShapes"); + auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128}); + auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5}); + // Clear layouts to signal XLA we are ready to get whatever are coming out of + // the compilation process. + xla::LayoutUtil::ClearLayout(&input_shape); + xla::LayoutUtil::ClearLayout(&kernel_shape); + auto param_shape = + xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape}); + auto param = xla::Parameter(&builder, 0, param_shape, "param"); + auto input = xla::GetTupleElement(param, 0); + auto kernel = xla::GetTupleElement(param, 1); + xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame); + TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build()); + + auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result(); + // Clear the result shape layout to tell XLA we are accepting whatever are + // coming out of the compilation process. + xla::LayoutUtil::ClearLayout(&result_shape); + + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = param_shape; + *shapes->mutable_result() = result_shape; + StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(), + {c_handle.program_shape}, {release}, &outputs)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 1); + + VLOG(2) << "Param: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0)); + VLOG(2) << "Result: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.result()); + + xla::ProgramShape xla_program_shape = + XlaCompiledProgramShape(xla_computation, *shapes); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(), + xla_program_shape.result().layout())); } TEST(RawApiTest, CompileAndExecuteZeroArg) { @@ -371,7 +464,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) { auto computation = ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); auto c_handle = ops::XRTCompile(root, computation); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, std::initializer_list({})); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -420,7 +513,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -455,7 +548,7 @@ TEST(RawApiTest, LeakCompilationReference) { ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({c_handle}, &outputs)); + TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs)); } } // namespace @@ -464,9 +557,12 @@ TEST(RawApiTest, LeakCompilationReference) { int main(int argc, char** argv) { tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU"); + tensorflow::xla_platform_ptr = new tensorflow::string("CPU"); std::vector flag_list = { tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr, "Tensorflow device type to use for test, e.g., XLA_CPU"), + tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr, + "The XLA platform to select for the device"), }; tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list); const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); -- GitLab From c77588f44043fca23328d9d680fca29a50a9df48 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 06:12:51 -0700 Subject: [PATCH 267/411] Don't use xla::Add on PRED types in tf-xla bridge for TileOp We are working on disallowing xla::Add on PRED types because they can be confusing. This is a prerequiste for that to not fail over in the new shape check. PiperOrigin-RevId: 216685015 --- tensorflow/compiler/tf2xla/kernels/tile_ops.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc index 93d5996b5e..52f2b36e19 100644 --- a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc @@ -96,7 +96,11 @@ class TileOp : public XlaOpKernel { // operation broadcast semantics. auto broadcasted_zero = xla::Broadcast( XlaHelpers::Zero(ctx->builder(), ctx->input_type(0)), output_shape); - ctx->SetOutput(0, xla::Add(broadcasted_zero, input)); + if (ctx->input_type(0) == DT_BOOL) { + ctx->SetOutput(0, xla::Or(broadcasted_zero, input)); + } else { + ctx->SetOutput(0, xla::Add(broadcasted_zero, input)); + } return; } -- GitLab From fe18d063a9e277583329ee017485ccb2196d2ce9 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Thu, 11 Oct 2018 06:25:35 -0700 Subject: [PATCH 268/411] Updated model and test data for microcontroller speech example PiperOrigin-RevId: 216686136 --- .../micro/examples/micro_speech/BUILD | 4 + .../micro/examples/micro_speech/README.md | 103 + .../micro_speech/micro_speech_test.cc | 82 + .../examples/micro_speech/no_features_data.cc | 152 + .../examples/micro_speech/no_features_data.h | 23 + .../micro_speech/tiny_conv_model_data.cc | 3235 +++++++++-------- .../micro_speech/yes_features_data.cc | 158 + .../examples/micro_speech/yes_features_data.h | 23 + .../experimental/micro/testing/micro_test.h | 18 + .../experimental/micro/tools/make/Makefile | 4 +- 10 files changed, 2184 insertions(+), 1618 deletions(-) create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD index dad58b6c1c..626f733540 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD @@ -14,8 +14,12 @@ tflite_micro_cc_test( name = "micro_speech_test", srcs = [ "micro_speech_test.cc", + "no_features_data.cc", + "no_features_data.h", "tiny_conv_model_data.cc", "tiny_conv_model_data.h", + "yes_features_data.cc", + "yes_features_data.h", ], tags = [ "nomsan", diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md new file mode 100644 index 0000000000..438a432356 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md @@ -0,0 +1,103 @@ +# Micro Speech Example + +This examples shows how you can use TensorFlow Lite to run a 20 kilobyte neural network model to recognize keywords in speech. It's designed to run on systems with very small amounts of memory such as microcontrollers and DSPs. The code itself also has a small footprint (for example around 22 kilobytes on a Cortex M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to run on systems like an STM32F103 with only 20 kilobytes of total SRAM and 64 kilobytes of Flash. + +## Table of Contents + + * [Getting Started](#getting-started) + * [Getting Started on a Microcontroller](#getting-started-on-a-microcontroller) + * [Calculating the Input to the Neural Network](#calculating-the-input-to-the-neural-network) + * [Creating Your Own Model](#creating-your-own-model) + +## Getting Started + +To compile and test this example on a desktop Linux or MacOS machine, download [the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd` into the source directory from a terminal, and then retrieve the support libraries you need by running: + +``` +tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh +``` + +This will take a few minutes, and downloads frameworks the code uses like [CMSIS](https://developer.arm.com/embedded/cmsis) and [flatbuffers](https://google.github.io/flatbuffers/). Once that process has finished, run: + +``` +make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test_micro_speech +``` + +You should see a series of files get compiled, followed by some logging output from a test, which should conclude with "~~~ALL TESTS PASSED~~~". If you see this, it means that a small program has been built and run that loads a trained TensorFlow model, runs some example inputs through it, and got the expected outputs. This particular test runs spectrograms generated from recordings of people saying "Yes" and "No", and checks that the network correctly identifies them. + +To understand how TensorFlow Lite does this, you can look at the `TestInvoke()` function in [micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc). It's a fairly small amount of code, creating an interpreter, getting a handle to a model that's been compiled into the program, and then invoking the interpreter with the model and sample inputs. + +## Getting Started on a Microcontroller + +Once you have downloaded the dependencies and got the x86/Linux build working, you can try building a version for the STM32F103 'bluepill' device. The following command will build the test and then run it on an emulator, assuming you have Docker installed: + +``` +make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test_micro_speech +``` + +If you have a real device [(see here for how to set one up)](https://github.com/google/stm32_bare_lib/tree/master/README.md) you can then convert the ELF file into a a `.bin` format executable to load onto it by running: + +``` +arm-none-eabi-objcopy \ +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test \ +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test.bin \ +--output binary +``` + +## Calculating the Input to the Neural Network + +The TensorFlow Lite model doesn't take in raw audio sample data. Instead it works with spectrograms, which are two dimensional arrays that are made up of slices of frequency information, each taken from a different time window. This test uses spectrograms that have been pre-calculated from one-second WAV files in the test data set. In a complete application these spectrograms would be calculated at runtime from microphone inputs, but the code for doing that is not yet included in this sample code. + +The recipe for creating the spectrogram data is that each frequency slice is created by running an FFT across a 30ms section of the audio sample data. The input samples are treated as being between -1 and +1 as real values (encoded as -32,768 and 32,767 in 16-bit signed integer samples). This results in an FFT with 256 entries. Every sequence of six entries is averaged together, giving a total of 43 frequency buckets in the final slice. The results are stored as unsigned eight-bit values, where 0 represents a real number of zero, and 255 represents 127.5 as a real number. Each adjacent frequency entry is stored in ascending memory order (frequency bucket 0 at data[0], bucket 1 at data [1], etc). The window for the frequency analysis is then moved forward by 20ms, and the process repeated, storing the results in the next memory row (for example bucket 0 in this moved window would be in data[43 + 0], etc). This process happens 49 times in total, producing a single channel image that is 43 pixels wide, and 49 rows high. Here's an illustration of the process: + +![spectrogram diagram](https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram_diagram.png) + + +The test data files have been generated by running the following commands: + +``` +bazel run tensorflow/examples/speech_commands:wav_to_features -- \ +--input_wav=${HOME}/speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav \ +--output_c_file=yes_features_data.cc \ +--window_stride=20 --preprocess=average --quantize=1 + +bazel run tensorflow/examples/speech_commands:wav_to_features -- \ +--input_wav=${HOME}/speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav \ +--output_c_file=no_features_data.cc \ +--window_stride=20 --preprocess=average --quantize=1 +``` + +## Creating Your Own Model + +The neural network model used in this example was built using the [TensorFlow speech commands tutorial](https://www.tensorflow.org/tutorials/sequences/audio_recognition). If you would like to create your own, you can start by training a model with this command: + +``` +bazel run -c opt --copt=-mavx2 --copt=-mfma \ +tensorflow/examples/speech_commands:train -- \ +--model_architecture=tiny_conv --window_stride=20 --preprocess=average \ +--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1 +``` + +If you see a compiling error on older machines, try leaving out the `--copt` arguments, they are just there to accelerate training on chips that support the extensions. The training process is likely to take a couple of hours. Once it has completed, the next step is to freeze the variables: + +``` +bazel run tensorflow/examples/speech_commands:freeze -- \ +--model_architecture=tiny_conv --window_stride=20 --preprocess=average \ +--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb +``` + +The next step is to create a TensorFlow Lite file from the frozen graph: + +``` +bazel run tensorflow/contrib/lite/toco:toco -- \ +--input_file=/tmp/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \ +--input_shapes=1,49,43,1 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \ +--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_values=2 \ +--change_concat_input_ranges=false +``` + +Finally, convert the file into a C source file that can be compiled into an embedded system: + +``` +xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_model_data.cc +``` diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc index 86cd056a72..0f4731fd4b 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h" #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h" #include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" #include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" #include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" @@ -24,9 +26,12 @@ limitations under the License. TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { + // Set up logging. tflite::MicroErrorReporter micro_error_reporter; tflite::ErrorReporter* error_reporter = µ_error_reporter; + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { error_reporter->Report( @@ -34,21 +39,98 @@ TF_LITE_MICRO_TEST(TestInvoke) { "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); } + + // This pulls in all the operation implementations we need. tflite::ops::micro::AllOpsResolver resolver; + // Create an area of memory to use for input, output, and intermediate arrays. const int tensor_arena_size = 10 * 1024; uint8_t tensor_arena[tensor_arena_size]; tflite::SimpleTensorAllocator tensor_allocator(tensor_arena, tensor_arena_size); + // Build an interpreter to run the model with. tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator, error_reporter); + + // Get information about the memory area to use for the model's input. + TfLiteTensor* input = interpreter.input(0); + + // Make sure the input has the properties we expect. + TF_LITE_MICRO_EXPECT_NE(nullptr, input); + TF_LITE_MICRO_EXPECT_EQ(4, input->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(49, input->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(43, input->dims->data[2]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, input->type); + + // Copy a spectrogram created from a .wav audio file of someone saying "Yes", + // into the memory area used for the input. + const uint8_t* yes_features_data = g_yes_f2e59fea_nohash_1_data; + for (int i = 0; i < input->bytes; ++i) { + input->data.uint8[i] = yes_features_data[i]; + } + + // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { error_reporter->Report("Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + // Get the output from the model, and make sure it's the expected size and + // type. + TfLiteTensor* output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type); + + // There are four possible classes in the output, each with a score. + const int kSilenceIndex = 0; + const int kUnknownIndex = 1; + const int kYesIndex = 2; + const int kNoIndex = 3; + + // Make sure that the expected "Yes" score is higher than the other classes. + uint8_t silence_score = output->data.uint8[kSilenceIndex]; + uint8_t unknown_score = output->data.uint8[kUnknownIndex]; + uint8_t yes_score = output->data.uint8[kYesIndex]; + uint8_t no_score = output->data.uint8[kNoIndex]; + TF_LITE_MICRO_EXPECT_GT(yes_score, silence_score); + TF_LITE_MICRO_EXPECT_GT(yes_score, unknown_score); + TF_LITE_MICRO_EXPECT_GT(yes_score, no_score); + + // Now test with a different input, from a recording of "No". + const uint8_t* no_features_data = g_no_f9643d42_nohash_4_data; + for (int i = 0; i < input->bytes; ++i) { + input->data.uint8[i] = no_features_data[i]; + } + + // Run the model on this "No" input. + invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + error_reporter->Report("Invoke failed\n"); + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + + // Get the output from the model, and make sure it's the expected size and + // type. + output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type); + + // Make sure that the expected "No" score is higher than the other classes. + silence_score = output->data.uint8[kSilenceIndex]; + unknown_score = output->data.uint8[kUnknownIndex]; + yes_score = output->data.uint8[kYesIndex]; + no_score = output->data.uint8[kNoIndex]; + TF_LITE_MICRO_EXPECT_GT(no_score, silence_score); + TF_LITE_MICRO_EXPECT_GT(no_score, unknown_score); + TF_LITE_MICRO_EXPECT_GT(no_score, yes_score); + error_reporter->Report("Ran successfully\n"); } diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc new file mode 100644 index 0000000000..3615deb26c --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc @@ -0,0 +1,152 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h" + +/* File automatically created by + * tensorflow/examples/speech_commands/wav_to_features.py \ + * --sample_rate=16000 \ + * --clip_duration_ms=1000 \ + * --window_size_ms=30 \ + * --window_stride_ms=20 \ + * --feature_bin_count=40 \ + * --quantize \ + * --preprocess="average" \ + * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \ + * --output_c_file="no_features_data.cc" \ + */ + +const int g_no_f9643d42_nohash_4_width = 43; +const int g_no_f9643d42_nohash_4_height = 49; +const unsigned char g_no_f9643d42_nohash_4_data[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 67, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 195, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 230, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 7, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 255, 7, 16, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 7, 22, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 238, 5, 20, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 144, 4, 19, 3, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 6, 3, + 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 1, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, +}; diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h new file mode 100644 index 0000000000..b53d0a202b --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ + +extern const int g_no_f9643d42_nohash_4_width; +extern const int g_no_f9643d42_nohash_4_height; +extern const unsigned char g_no_f9643d42_nohash_4_data[]; + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc index f1f9e0e219..f0769a1237 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc @@ -15,6 +15,7 @@ limitations under the License. // Automatically created from a TensorFlow Lite flatbuffer using the command: // xxd -i tiny_conv.tflite > tiny_conv_model_data.cc +// See the README for a full description of the creation process. #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" @@ -26,1643 +27,1643 @@ const unsigned char g_tiny_conv_model_data[] = { 0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00, - 0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, + 0xb4, 0x47, 0x00, 0x00, 0xe4, 0x02, 0x00, 0x00, 0xb4, 0x02, 0x00, 0x00, + 0xac, 0x02, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff, - 0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff, - 0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff, - 0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83, - 0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac, - 0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6, - 0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2, - 0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9, - 0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e, - 0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57, - 0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac, - 0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a, - 0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac, - 0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1, - 0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d, - 0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b, - 0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c, - 0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87, - 0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4, - 0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8, - 0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1, - 0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c, - 0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6, - 0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2, - 0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6, - 0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf, - 0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98, - 0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45, - 0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4, - 0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91, - 0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac, - 0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9, - 0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4, - 0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42, - 0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9, - 0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99, - 0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac, - 0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b, - 0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae, - 0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3, - 0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83, - 0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb, - 0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1, - 0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e, - 0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7, - 0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4, - 0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5, - 0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4, - 0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3, - 0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9, - 0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8, - 0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b, - 0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7, - 0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6, - 0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0, - 0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9, - 0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff, - 0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50, - 0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d, - 0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e, - 0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43, - 0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c, - 0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f, - 0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49, - 0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56, - 0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45, - 0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a, - 0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f, - 0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e, - 0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45, - 0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53, - 0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44, - 0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f, - 0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c, - 0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54, - 0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b, - 0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a, - 0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50, - 0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a, - 0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48, - 0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40, - 0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44, - 0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e, - 0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51, - 0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49, - 0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a, - 0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55, - 0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48, - 0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52, - 0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b, - 0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47, - 0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54, - 0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d, - 0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d, - 0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47, - 0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c, - 0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e, - 0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51, - 0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48, - 0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f, - 0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52, - 0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43, - 0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b, - 0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48, - 0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e, - 0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c, - 0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55, - 0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42, - 0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56, - 0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d, - 0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43, - 0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e, - 0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b, - 0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46, - 0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b, - 0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b, - 0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e, - 0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d, - 0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48, - 0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c, - 0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51, - 0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e, - 0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f, - 0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e, - 0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41, - 0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47, - 0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b, - 0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42, - 0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44, - 0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43, - 0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54, - 0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47, - 0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47, - 0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46, - 0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42, - 0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41, - 0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62, - 0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50, - 0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a, - 0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45, - 0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f, - 0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45, - 0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45, - 0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c, - 0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45, - 0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e, - 0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51, - 0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46, - 0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46, - 0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d, - 0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b, - 0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47, - 0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f, - 0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d, - 0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42, - 0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d, - 0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43, - 0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41, - 0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43, - 0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c, - 0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b, - 0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d, - 0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41, - 0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53, - 0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f, - 0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41, - 0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51, - 0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40, - 0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d, - 0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a, - 0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d, - 0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f, - 0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46, - 0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49, - 0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52, - 0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50, - 0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49, - 0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52, - 0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c, - 0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50, - 0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e, - 0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d, - 0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d, - 0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c, - 0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48, - 0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44, - 0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48, - 0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d, - 0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49, - 0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a, - 0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52, - 0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40, - 0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d, - 0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e, - 0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e, - 0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48, - 0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50, - 0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43, - 0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40, - 0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d, - 0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f, - 0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e, - 0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45, - 0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47, - 0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54, - 0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d, - 0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d, - 0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52, - 0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45, - 0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c, - 0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53, - 0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e, - 0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36, - 0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49, - 0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44, - 0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c, - 0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41, - 0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47, - 0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f, - 0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48, - 0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45, - 0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47, - 0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49, - 0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d, - 0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a, - 0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48, - 0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e, - 0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43, - 0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d, - 0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d, - 0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52, - 0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c, - 0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47, - 0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51, - 0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b, - 0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41, - 0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38, - 0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42, - 0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53, - 0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48, - 0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48, - 0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a, - 0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a, - 0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e, - 0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e, - 0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b, - 0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d, - 0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e, - 0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55, - 0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37, - 0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44, - 0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44, - 0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e, - 0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a, - 0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57, - 0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e, - 0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41, - 0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49, - 0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e, - 0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44, - 0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49, - 0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d, - 0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52, - 0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42, - 0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48, - 0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a, - 0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41, - 0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48, - 0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61, - 0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f, - 0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31, - 0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49, - 0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e, - 0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c, - 0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a, - 0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47, - 0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44, - 0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45, - 0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46, - 0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a, - 0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c, - 0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43, - 0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e, - 0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f, - 0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d, - 0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b, - 0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f, - 0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49, - 0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49, - 0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51, - 0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a, - 0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51, - 0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59, - 0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41, - 0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47, - 0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45, - 0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47, - 0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48, - 0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c, - 0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a, - 0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38, - 0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50, - 0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c, - 0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50, - 0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48, - 0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42, - 0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57, - 0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44, - 0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e, - 0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42, - 0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d, - 0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45, - 0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f, - 0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49, - 0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37, - 0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48, - 0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45, - 0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52, - 0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49, - 0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48, - 0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54, - 0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49, - 0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f, - 0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e, - 0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41, - 0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42, - 0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53, - 0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43, - 0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b, - 0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49, - 0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48, - 0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43, - 0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48, - 0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42, - 0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46, - 0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45, - 0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a, - 0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44, - 0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51, - 0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47, - 0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46, - 0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50, - 0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57, - 0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e, - 0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44, - 0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42, - 0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45, - 0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42, - 0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51, - 0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48, - 0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d, - 0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49, - 0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54, - 0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41, - 0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d, - 0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51, - 0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b, - 0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45, - 0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a, - 0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49, - 0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44, - 0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a, - 0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f, - 0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c, - 0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e, - 0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48, - 0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52, - 0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40, - 0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49, - 0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50, - 0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55, - 0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40, - 0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48, - 0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47, - 0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37, - 0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53, - 0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a, - 0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a, - 0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e, - 0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d, - 0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60, - 0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40, - 0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55, - 0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52, - 0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55, - 0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46, - 0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b, - 0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47, - 0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a, - 0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42, - 0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c, - 0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51, - 0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49, - 0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47, - 0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e, - 0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49, - 0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61, - 0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46, - 0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f, - 0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49, - 0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e, - 0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a, - 0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46, - 0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49, - 0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e, - 0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c, - 0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e, - 0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a, - 0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42, - 0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c, - 0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f, - 0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e, - 0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46, - 0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d, - 0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f, - 0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a, - 0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52, - 0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42, - 0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52, - 0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c, - 0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b, - 0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41, - 0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42, - 0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51, - 0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c, - 0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c, - 0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40, - 0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47, - 0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42, - 0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b, - 0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e, - 0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f, - 0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53, - 0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e, - 0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52, - 0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e, - 0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50, - 0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42, - 0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e, - 0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48, - 0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49, - 0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48, - 0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a, - 0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43, - 0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e, - 0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44, - 0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55, - 0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47, - 0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b, - 0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48, - 0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a, - 0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49, - 0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43, - 0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f, - 0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b, - 0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50, - 0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b, - 0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e, - 0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44, - 0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48, - 0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58, - 0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b, - 0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62, - 0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56, - 0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c, - 0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53, - 0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52, - 0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46, - 0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47, - 0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48, - 0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49, - 0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52, - 0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40, - 0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b, - 0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56, - 0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46, - 0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61, - 0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47, - 0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57, - 0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49, - 0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f, - 0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f, - 0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e, - 0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b, - 0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a, - 0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c, - 0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38, - 0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51, - 0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58, - 0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53, - 0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60, - 0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49, - 0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53, - 0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44, - 0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40, - 0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47, - 0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45, - 0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74, - 0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41, - 0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57, - 0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a, - 0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c, - 0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b, - 0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55, - 0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50, - 0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54, - 0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60, - 0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f, - 0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54, - 0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46, - 0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48, - 0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f, - 0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50, - 0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f, - 0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46, - 0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55, - 0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43, - 0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49, - 0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53, - 0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45, - 0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58, - 0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45, - 0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c, - 0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d, - 0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47, - 0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a, - 0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c, - 0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47, - 0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59, - 0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c, - 0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43, - 0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e, - 0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d, - 0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52, - 0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58, - 0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46, - 0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55, - 0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f, - 0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52, - 0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45, - 0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e, - 0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70, - 0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46, - 0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50, - 0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48, - 0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45, - 0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a, - 0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b, - 0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51, - 0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49, - 0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a, - 0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e, - 0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f, - 0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50, - 0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52, - 0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48, - 0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57, - 0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48, - 0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43, - 0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52, - 0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42, - 0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47, - 0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47, - 0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a, - 0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54, - 0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45, - 0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e, - 0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43, - 0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51, - 0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44, - 0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50, - 0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c, - 0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56, - 0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c, - 0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49, - 0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b, - 0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d, - 0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46, - 0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57, - 0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f, - 0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b, - 0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42, - 0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41, - 0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e, - 0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44, - 0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64, - 0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42, - 0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c, - 0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51, - 0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45, - 0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45, - 0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48, - 0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f, - 0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49, - 0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51, - 0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51, - 0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44, - 0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49, - 0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45, - 0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46, - 0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a, - 0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48, - 0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50, - 0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50, - 0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46, - 0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f, - 0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41, - 0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d, - 0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51, - 0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49, - 0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a, - 0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45, - 0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e, - 0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51, - 0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56, - 0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49, - 0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d, - 0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57, - 0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a, - 0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d, - 0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a, - 0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c, - 0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50, - 0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50, - 0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a, - 0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b, - 0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e, - 0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47, - 0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b, - 0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e, - 0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48, - 0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53, - 0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a, - 0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52, - 0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49, - 0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43, - 0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40, - 0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f, - 0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a, - 0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47, - 0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d, - 0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52, - 0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52, - 0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51, - 0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60, - 0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47, - 0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59, - 0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d, - 0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47, - 0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d, - 0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39, - 0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46, - 0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b, - 0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c, - 0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47, - 0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44, - 0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42, - 0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54, - 0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53, - 0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56, - 0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a, - 0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a, - 0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54, - 0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48, - 0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44, - 0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45, - 0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47, - 0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b, - 0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49, - 0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51, - 0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50, - 0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51, - 0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a, - 0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a, - 0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48, - 0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57, - 0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e, - 0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48, - 0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41, - 0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51, - 0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38, - 0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42, - 0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c, - 0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a, - 0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43, - 0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48, - 0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a, - 0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47, - 0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a, - 0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44, - 0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50, - 0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56, - 0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46, - 0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b, - 0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a, - 0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51, - 0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d, - 0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41, - 0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b, - 0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e, - 0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e, - 0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52, - 0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56, - 0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a, - 0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a, - 0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e, - 0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e, - 0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49, - 0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47, - 0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54, - 0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c, - 0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42, - 0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38, - 0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50, - 0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51, - 0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e, - 0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55, - 0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e, - 0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49, - 0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44, - 0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54, - 0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f, - 0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44, - 0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47, - 0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e, - 0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d, - 0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a, - 0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48, - 0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44, - 0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43, - 0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b, - 0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d, - 0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51, - 0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49, - 0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a, - 0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f, - 0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d, - 0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49, - 0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37, - 0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a, - 0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48, - 0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42, - 0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39, - 0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45, - 0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45, - 0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e, - 0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46, - 0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45, - 0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a, - 0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58, - 0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d, - 0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b, - 0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f, - 0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f, - 0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d, - 0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50, - 0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42, - 0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a, - 0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44, - 0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52, - 0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47, - 0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42, - 0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44, - 0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45, - 0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a, - 0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d, - 0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36, - 0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50, - 0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39, - 0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b, - 0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40, - 0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42, - 0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43, - 0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b, - 0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44, - 0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e, - 0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b, - 0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43, - 0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43, - 0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f, - 0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41, - 0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42, - 0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43, - 0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f, - 0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a, - 0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41, - 0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45, - 0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a, - 0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42, - 0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46, - 0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b, - 0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b, - 0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c, - 0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49, - 0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a, - 0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46, - 0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d, - 0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51, - 0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b, - 0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45, - 0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43, - 0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53, - 0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d, - 0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b, - 0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44, - 0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a, - 0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51, - 0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c, - 0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59, - 0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46, - 0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d, - 0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a, - 0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b, - 0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54, - 0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e, - 0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42, - 0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a, - 0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d, - 0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47, - 0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42, - 0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37, - 0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66, - 0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57, - 0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d, - 0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43, - 0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50, - 0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26, - 0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48, - 0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33, - 0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b, - 0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31, - 0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43, - 0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d, - 0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d, - 0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45, - 0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46, - 0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62, - 0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42, - 0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46, - 0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47, - 0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a, - 0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44, - 0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b, - 0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52, - 0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36, - 0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56, - 0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38, - 0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45, - 0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c, - 0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e, - 0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51, - 0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58, - 0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e, - 0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b, - 0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f, - 0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f, - 0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34, - 0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e, - 0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35, - 0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48, - 0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32, - 0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44, - 0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b, - 0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49, - 0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e, - 0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63, - 0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59, - 0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a, - 0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49, - 0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c, - 0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46, - 0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a, - 0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43, - 0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50, - 0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40, - 0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a, - 0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d, - 0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43, - 0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a, - 0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c, - 0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63, - 0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41, - 0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46, - 0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46, - 0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42, - 0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49, - 0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47, - 0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48, - 0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45, - 0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47, - 0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d, - 0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b, - 0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b, - 0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40, - 0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52, - 0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52, - 0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d, - 0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c, - 0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43, - 0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49, - 0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b, - 0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c, - 0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50, - 0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45, - 0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f, - 0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42, - 0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43, - 0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46, - 0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30, - 0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62, - 0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56, - 0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c, - 0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54, - 0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48, - 0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59, - 0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46, - 0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57, - 0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47, - 0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46, - 0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50, - 0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42, - 0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d, - 0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b, - 0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54, - 0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62, - 0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51, - 0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40, - 0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47, - 0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e, - 0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46, - 0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64, - 0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f, - 0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c, - 0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a, - 0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47, - 0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49, - 0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d, - 0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43, - 0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55, - 0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51, - 0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64, - 0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46, - 0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44, - 0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50, - 0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56, - 0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e, - 0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c, - 0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52, - 0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b, - 0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c, - 0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f, - 0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c, - 0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a, - 0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a, - 0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e, - 0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43, - 0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55, - 0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48, - 0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d, - 0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41, - 0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f, - 0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46, - 0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f, - 0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47, - 0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36, - 0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40, - 0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f, - 0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b, - 0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67, - 0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44, - 0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c, - 0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42, - 0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c, - 0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43, - 0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69, - 0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48, - 0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60, - 0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d, - 0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39, - 0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f, - 0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46, - 0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47, - 0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50, - 0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58, - 0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58, - 0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b, - 0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b, - 0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d, - 0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d, - 0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47, - 0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62, - 0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c, - 0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51, - 0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f, - 0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47, - 0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a, - 0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30, - 0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f, - 0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61, - 0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d, - 0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50, - 0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e, - 0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64, - 0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b, - 0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67, - 0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43, - 0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53, - 0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48, - 0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34, - 0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47, - 0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59, - 0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47, - 0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d, - 0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44, - 0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40, - 0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b, - 0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52, - 0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d, - 0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74, - 0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e, - 0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56, - 0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40, - 0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e, - 0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42, - 0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47, - 0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c, - 0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52, - 0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51, - 0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57, - 0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44, - 0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51, - 0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d, - 0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66, - 0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f, - 0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66, - 0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54, - 0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55, - 0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52, - 0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a, - 0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e, - 0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d, - 0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60, - 0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e, - 0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49, - 0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42, - 0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40, - 0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e, - 0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50, - 0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72, - 0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46, - 0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59, - 0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46, - 0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32, - 0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c, - 0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59, - 0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e, - 0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54, - 0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42, - 0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f, - 0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f, - 0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e, - 0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48, - 0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71, - 0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b, - 0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65, - 0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55, - 0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e, - 0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44, - 0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d, - 0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f, - 0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c, - 0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a, - 0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51, - 0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b, - 0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52, - 0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c, - 0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e, - 0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44, - 0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69, - 0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c, - 0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53, - 0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46, - 0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48, - 0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47, - 0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23, - 0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f, - 0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49, - 0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43, - 0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46, - 0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41, - 0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60, - 0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55, - 0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71, - 0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d, - 0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58, - 0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49, - 0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45, - 0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43, - 0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b, - 0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e, - 0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c, - 0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56, - 0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b, - 0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45, - 0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f, - 0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55, - 0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75, - 0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52, - 0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63, - 0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40, - 0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39, - 0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e, - 0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42, - 0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c, - 0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41, - 0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a, - 0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a, - 0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45, - 0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e, - 0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46, - 0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66, - 0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c, - 0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61, - 0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47, - 0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42, - 0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e, - 0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f, - 0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45, - 0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b, - 0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51, - 0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40, - 0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d, - 0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e, - 0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48, - 0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a, - 0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48, - 0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b, - 0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e, - 0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53, - 0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48, - 0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47, - 0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a, - 0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41, - 0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49, - 0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40, - 0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51, - 0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c, - 0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45, - 0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a, - 0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54, - 0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d, - 0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53, - 0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51, - 0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47, - 0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39, - 0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46, - 0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48, - 0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34, - 0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40, - 0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48, - 0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46, - 0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52, - 0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44, - 0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52, - 0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f, - 0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e, - 0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b, - 0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49, - 0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f, - 0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56, - 0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47, - 0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47, - 0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45, - 0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46, - 0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40, - 0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47, - 0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e, - 0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a, - 0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b, - 0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f, - 0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b, - 0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f, - 0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50, - 0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47, - 0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c, - 0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43, - 0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48, - 0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48, - 0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e, - 0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46, - 0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54, - 0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46, - 0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b, - 0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e, - 0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53, - 0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41, - 0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a, - 0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49, - 0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58, - 0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49, - 0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46, - 0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30, - 0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f, - 0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b, - 0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48, - 0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46, - 0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49, - 0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43, - 0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d, - 0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a, - 0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f, - 0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c, - 0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59, - 0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45, - 0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48, - 0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49, - 0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a, - 0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e, - 0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b, - 0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43, - 0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a, - 0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41, - 0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a, - 0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43, - 0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51, - 0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e, - 0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c, - 0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47, - 0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e, - 0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d, - 0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d, - 0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47, - 0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d, - 0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46, - 0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a, - 0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40, - 0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c, - 0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e, - 0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48, - 0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f, - 0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c, - 0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51, - 0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54, - 0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44, - 0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44, - 0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e, - 0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48, - 0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c, - 0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c, - 0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c, - 0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c, - 0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f, - 0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38, - 0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f, - 0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43, - 0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46, - 0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f, - 0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e, - 0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44, - 0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c, - 0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52, - 0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f, - 0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41, - 0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41, - 0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a, - 0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48, - 0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30, - 0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46, - 0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46, - 0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49, - 0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d, - 0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41, - 0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63, - 0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42, - 0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e, - 0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48, - 0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28, - 0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37, - 0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58, - 0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45, - 0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34, - 0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46, - 0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35, - 0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44, - 0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43, - 0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d, - 0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55, - 0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43, - 0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47, - 0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e, - 0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48, - 0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a, - 0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c, - 0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48, - 0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44, - 0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54, - 0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36, - 0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50, - 0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e, - 0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a, - 0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51, - 0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d, - 0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e, - 0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52, - 0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58, - 0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47, - 0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a, - 0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45, - 0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53, - 0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44, - 0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30, - 0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51, - 0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38, - 0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c, - 0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43, - 0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42, - 0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d, - 0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45, - 0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55, - 0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a, - 0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d, - 0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41, - 0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e, - 0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40, - 0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34, - 0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d, - 0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35, - 0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45, - 0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39, - 0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e, - 0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56, - 0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43, - 0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c, - 0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d, - 0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c, - 0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41, - 0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40, - 0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e, - 0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47, - 0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49, - 0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b, - 0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a, - 0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31, - 0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c, - 0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b, - 0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41, - 0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e, - 0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50, - 0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c, - 0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47, - 0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30, - 0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43, - 0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46, - 0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47, - 0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e, - 0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43, - 0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37, - 0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c, - 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46, - 0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d, - 0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55, - 0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c, - 0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54, - 0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47, - 0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22, - 0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a, - 0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e, - 0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d, - 0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e, - 0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46, - 0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e, - 0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50, - 0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f, - 0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47, - 0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a, - 0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b, - 0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e, - 0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c, - 0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a, - 0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39, - 0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e, - 0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50, - 0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e, - 0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48, - 0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f, - 0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41, - 0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37, - 0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47, - 0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47, - 0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49, - 0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45, - 0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c, - 0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61, - 0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e, - 0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a, - 0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45, - 0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47, - 0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43, - 0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f, - 0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c, - 0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c, - 0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42, - 0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e, - 0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45, - 0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57, - 0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e, - 0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54, - 0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b, - 0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b, - 0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41, - 0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f, - 0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40, - 0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38, - 0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40, - 0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a, - 0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45, - 0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a, - 0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f, - 0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57, - 0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47, - 0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e, - 0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47, - 0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b, - 0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45, - 0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41, - 0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49, - 0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44, - 0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46, - 0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37, - 0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47, - 0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35, - 0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48, - 0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43, - 0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e, - 0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d, - 0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a, - 0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57, - 0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40, - 0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38, - 0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49, - 0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42, - 0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f, - 0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23, - 0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e, - 0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37, - 0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c, - 0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45, - 0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50, - 0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c, - 0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b, - 0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d, - 0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51, - 0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41, - 0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e, - 0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b, - 0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d, - 0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37, - 0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d, - 0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34, - 0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49, - 0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b, - 0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45, - 0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54, - 0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46, - 0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c, - 0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c, - 0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b, - 0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42, - 0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42, - 0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f, - 0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c, - 0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f, - 0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30, - 0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50, - 0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45, - 0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a, - 0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a, - 0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49, - 0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49, - 0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42, - 0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a, - 0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47, - 0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42, - 0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55, - 0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f, - 0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47, - 0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30, - 0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53, - 0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d, - 0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f, - 0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44, - 0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52, - 0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58, - 0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e, - 0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44, - 0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51, - 0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e, - 0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40, - 0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54, - 0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48, - 0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f, - 0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b, - 0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43, - 0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44, - 0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40, - 0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f, - 0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50, - 0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48, - 0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46, - 0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46, - 0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44, - 0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a, - 0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46, - 0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e, - 0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b, - 0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41, - 0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a, - 0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f, - 0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f, - 0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53, - 0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e, - 0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45, - 0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42, - 0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47, - 0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, + 0xbc, 0xb3, 0xff, 0xff, 0xc0, 0xb3, 0xff, 0xff, 0x1e, 0xb4, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x89, 0xa5, 0xe8, 0xc1, + 0xb1, 0x89, 0x5b, 0xc6, 0x4f, 0x9b, 0xd3, 0x74, 0x93, 0x88, 0xff, 0xaf, + 0x89, 0xff, 0xf4, 0x70, 0xcc, 0x75, 0x78, 0xbf, 0x92, 0xcd, 0xa9, 0xa8, + 0xd6, 0x6a, 0x6f, 0x7b, 0x7f, 0xd8, 0xa8, 0xb1, 0xe6, 0x32, 0x21, 0x70, + 0xa0, 0x9c, 0x6f, 0xc8, 0xc6, 0x59, 0x67, 0x93, 0x97, 0xca, 0x3f, 0xde, + 0xcb, 0x74, 0x7c, 0xb5, 0xa4, 0xd9, 0x66, 0xc6, 0x87, 0x98, 0xa5, 0xd0, + 0xbb, 0xb9, 0xc2, 0xb2, 0xaa, 0x79, 0x25, 0xb9, 0x6d, 0x5a, 0xc8, 0x7f, + 0x70, 0x85, 0x79, 0xbc, 0x6a, 0x9b, 0xd1, 0x9a, 0x9c, 0x51, 0x53, 0x71, + 0x89, 0xc0, 0xb4, 0xac, 0xae, 0x47, 0x67, 0x70, 0x79, 0xd2, 0x81, 0xa5, + 0xd2, 0x09, 0x38, 0x82, 0x74, 0xc9, 0x5d, 0xaf, 0xc1, 0x4f, 0x53, 0x99, + 0xcb, 0xb7, 0x3a, 0xba, 0xe8, 0x7f, 0x76, 0xb9, 0xb3, 0xd3, 0x60, 0xc0, + 0x93, 0x9f, 0x87, 0xbd, 0xd0, 0xb8, 0xca, 0xc1, 0xb6, 0x6c, 0x01, 0xc1, + 0x5c, 0x5d, 0xb2, 0x82, 0x76, 0x77, 0x39, 0xbc, 0x72, 0x6a, 0xc3, 0xb4, + 0x79, 0x21, 0x48, 0x42, 0x86, 0xa6, 0xbd, 0xaf, 0xae, 0x23, 0x9c, 0x69, + 0x78, 0xc3, 0x6b, 0xb3, 0xab, 0x43, 0xb2, 0x88, 0x71, 0xc6, 0x6b, 0xbe, + 0xc3, 0x75, 0xc2, 0xc3, 0xa5, 0xcf, 0x32, 0xbe, 0xcb, 0xb0, 0xb8, 0xc1, + 0x9c, 0xcf, 0x64, 0xc4, 0xb4, 0x96, 0xa8, 0xb9, 0xcb, 0xc0, 0xc0, 0xb8, + 0xb8, 0x77, 0x65, 0xc0, 0xc4, 0xb3, 0xc5, 0x77, 0x9b, 0x61, 0xd4, 0xac, + 0x7e, 0x36, 0xb1, 0xae, 0x36, 0x36, 0xb8, 0x39, 0x6b, 0x70, 0x9c, 0xb5, + 0x88, 0x5c, 0xb3, 0x6a, 0xad, 0xc5, 0x7b, 0xb4, 0xad, 0xaa, 0xc4, 0x84, + 0x5e, 0xc4, 0x67, 0xc1, 0xde, 0xba, 0xcf, 0xbd, 0xa0, 0xd3, 0x35, 0xb3, + 0xe7, 0xc8, 0xb8, 0xb8, 0xaf, 0xb4, 0x59, 0xb8, 0xb4, 0xac, 0xac, 0xaa, + 0xc7, 0xad, 0xc8, 0xb6, 0xac, 0x99, 0xa0, 0xcb, 0xc1, 0xc8, 0xcb, 0x89, + 0xc3, 0xac, 0xca, 0x8b, 0x97, 0x1f, 0xbd, 0xbf, 0x13, 0xad, 0xc8, 0x41, + 0x56, 0x3c, 0x86, 0xb2, 0x61, 0xc4, 0xbb, 0x71, 0xba, 0x92, 0x8d, 0xc3, + 0x86, 0xcb, 0xc5, 0x8d, 0x88, 0xc8, 0x6a, 0xbf, 0x9c, 0xcd, 0xcd, 0xc0, + 0x81, 0xb1, 0x47, 0xb5, 0xf0, 0xce, 0xb1, 0xc1, 0xaa, 0xa8, 0x54, 0xcb, + 0xbc, 0xc7, 0xc5, 0x8e, 0xc3, 0xce, 0xc7, 0xb9, 0xb9, 0xa1, 0xc5, 0xbd, + 0xb8, 0xb8, 0xb7, 0x81, 0xb6, 0xba, 0xd2, 0x90, 0xbc, 0x96, 0xbe, 0xba, + 0x53, 0xb5, 0xc7, 0x3c, 0x3c, 0x1f, 0x90, 0xaa, 0x5a, 0xb8, 0xba, 0x7e, + 0xbc, 0x9e, 0xc2, 0xb1, 0x6e, 0xc0, 0xc4, 0x91, 0xf0, 0xb5, 0x60, 0xad, + 0x73, 0xba, 0xcd, 0xba, 0x6e, 0x94, 0x39, 0xb5, 0xe4, 0xbe, 0xb4, 0xb5, + 0xa0, 0xa9, 0x51, 0xac, 0xbc, 0xc2, 0xb3, 0x8a, 0xbd, 0x9a, 0xca, 0xb3, + 0xbf, 0xaf, 0xb5, 0x9a, 0xb9, 0xc3, 0xb6, 0x92, 0xb5, 0xc1, 0xb0, 0x95, + 0xd6, 0xcc, 0xbb, 0xbb, 0xa9, 0xb9, 0xac, 0x4a, 0x62, 0x27, 0xa7, 0xa7, + 0x30, 0xbd, 0xb1, 0x73, 0xa1, 0x74, 0xc2, 0xb7, 0x58, 0xc0, 0xae, 0x8f, + 0xe1, 0xac, 0x4e, 0xb0, 0x55, 0xc9, 0xc8, 0x9f, 0x83, 0x8e, 0x3e, 0xd5, + 0xb5, 0xbe, 0xcd, 0xb2, 0xa6, 0xc8, 0x64, 0xac, 0xc0, 0xc8, 0xaf, 0x99, + 0xc5, 0x9e, 0xb8, 0xbd, 0xa9, 0xc2, 0xb3, 0x81, 0xb4, 0xc2, 0xb4, 0x8f, + 0xbc, 0xb8, 0x9c, 0x88, 0xbe, 0xc6, 0xbf, 0xba, 0xc8, 0xb4, 0xab, 0x5b, + 0x92, 0x51, 0xb1, 0x9a, 0x44, 0xb9, 0xab, 0x80, 0xa5, 0x3e, 0xc0, 0xa5, + 0x5c, 0xb6, 0xa8, 0xa2, 0xb3, 0x9a, 0x6b, 0xb3, 0x34, 0xc6, 0x7e, 0x96, + 0xcb, 0x88, 0x48, 0xc6, 0xa3, 0xbb, 0xd2, 0xa2, 0xaf, 0xd0, 0x6e, 0xae, + 0xb4, 0xce, 0xc8, 0x8f, 0xd7, 0xad, 0xc8, 0xb0, 0xae, 0xb7, 0xb2, 0x70, + 0xb9, 0xad, 0xc1, 0xa0, 0xcb, 0xa2, 0xb0, 0x9b, 0xbe, 0xd3, 0xca, 0xb6, + 0xbd, 0xaf, 0xa9, 0x82, 0xa1, 0xd7, 0xbc, 0x9b, 0x8b, 0xac, 0xaa, 0xac, + 0xad, 0x37, 0xb7, 0xb6, 0x46, 0xae, 0xa9, 0xbd, 0x6b, 0x90, 0x5e, 0xcd, + 0x23, 0xa4, 0x76, 0xa1, 0xc4, 0x96, 0x50, 0xcc, 0x95, 0x99, 0x93, 0xa7, + 0xb2, 0xe1, 0x7c, 0xbd, 0xbd, 0xb5, 0xbf, 0x9a, 0xca, 0x80, 0xd7, 0xae, + 0x79, 0xa8, 0xaa, 0xb2, 0xbc, 0x51, 0xda, 0xa3, 0x80, 0x8b, 0xa2, 0xc8, + 0xd1, 0x94, 0xe1, 0xc4, 0xbd, 0xae, 0xae, 0xcc, 0xb3, 0xca, 0xd5, 0xa1, + 0xd5, 0xa7, 0xaf, 0xd2, 0xb4, 0x8d, 0xcc, 0xc8, 0x63, 0xa3, 0xa4, 0xdf, + 0x6f, 0x7e, 0x98, 0xdf, 0x1b, 0x7b, 0x43, 0x99, 0xb0, 0x99, 0x71, 0xdb, + 0x63, 0x7b, 0x69, 0x9c, 0xba, 0xcd, 0x90, 0xd0, 0xb6, 0xa6, 0x9e, 0x95, + 0x50, 0xb6, 0xff, 0xff, 0xae, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, + 0xda, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0xc0, 0x44, 0x00, 0x00, + 0x2c, 0x30, 0x38, 0x5a, 0x3d, 0x4c, 0x44, 0x3b, 0x48, 0x48, 0x44, 0x57, + 0x3f, 0x43, 0x45, 0x3a, 0x24, 0x32, 0x21, 0x5c, 0x3f, 0x3a, 0x38, 0x3a, + 0x35, 0x35, 0x2f, 0x51, 0x3c, 0x3a, 0x45, 0x3a, 0x3b, 0x41, 0x39, 0x55, + 0x3c, 0x41, 0x39, 0x44, 0x3a, 0x40, 0x37, 0x48, 0x33, 0x47, 0x36, 0x3e, + 0x3c, 0x41, 0x3f, 0x3e, 0x3e, 0x47, 0x36, 0x3e, 0x41, 0x33, 0x3e, 0x3b, + 0x3a, 0x46, 0x45, 0x40, 0x48, 0x3a, 0x35, 0x4b, 0x45, 0x4d, 0x3c, 0x49, + 0x42, 0x44, 0x3c, 0x4c, 0x3e, 0x3c, 0x44, 0x32, 0x33, 0x41, 0x36, 0x4b, + 0x38, 0x3b, 0x3c, 0x38, 0x3b, 0x45, 0x34, 0x46, 0x40, 0x4e, 0x44, 0x35, + 0x43, 0x36, 0x3d, 0x40, 0x3e, 0x48, 0x40, 0x34, 0x3a, 0x46, 0x45, 0x43, + 0x45, 0x3f, 0x47, 0x37, 0x36, 0x35, 0x44, 0x3a, 0x3e, 0x37, 0x39, 0x40, + 0x3a, 0x3f, 0x3f, 0x4c, 0x3e, 0x41, 0x43, 0x35, 0x3f, 0x3d, 0x3d, 0x4c, + 0x3c, 0x4a, 0x46, 0x3c, 0x3a, 0x41, 0x40, 0x4e, 0x36, 0x47, 0x40, 0x3b, + 0x47, 0x42, 0x38, 0x4d, 0x48, 0x47, 0x3c, 0x3c, 0x33, 0x3b, 0x3e, 0x42, + 0x3f, 0x3e, 0x3a, 0x3d, 0x32, 0x39, 0x41, 0x46, 0x3a, 0x3a, 0x3e, 0x3e, + 0x47, 0x48, 0x4e, 0x36, 0x44, 0x40, 0x41, 0x45, 0x3a, 0x3c, 0x38, 0x55, + 0x2e, 0x26, 0x2f, 0x32, 0x3f, 0x41, 0x3e, 0x4c, 0x45, 0x36, 0x40, 0x31, + 0x17, 0x2e, 0x14, 0x53, 0x34, 0x30, 0x34, 0x3f, 0x2e, 0x44, 0x2b, 0x4e, + 0x34, 0x3e, 0x34, 0x43, 0x3d, 0x35, 0x3f, 0x46, 0x39, 0x40, 0x38, 0x3e, + 0x35, 0x3b, 0x35, 0x45, 0x3d, 0x40, 0x38, 0x37, 0x40, 0x3e, 0x32, 0x3e, + 0x41, 0x39, 0x30, 0x41, 0x3a, 0x32, 0x3e, 0x3d, 0x39, 0x31, 0x33, 0x3e, + 0x41, 0x47, 0x40, 0x47, 0x35, 0x33, 0x3c, 0x32, 0x40, 0x3c, 0x42, 0x49, + 0x34, 0x38, 0x39, 0x37, 0x39, 0x35, 0x40, 0x4d, 0x37, 0x43, 0x42, 0x3e, + 0x3f, 0x3c, 0x3e, 0x51, 0x36, 0x37, 0x42, 0x41, 0x36, 0x31, 0x43, 0x3d, + 0x46, 0x43, 0x37, 0x46, 0x32, 0x45, 0x42, 0x36, 0x3f, 0x42, 0x42, 0x41, + 0x3d, 0x46, 0x39, 0x41, 0x3c, 0x3f, 0x38, 0x3c, 0x43, 0x43, 0x3d, 0x3c, + 0x3d, 0x41, 0x38, 0x42, 0x3a, 0x3d, 0x43, 0x42, 0x41, 0x40, 0x39, 0x36, + 0x3a, 0x3c, 0x3c, 0x4f, 0x44, 0x36, 0x39, 0x35, 0x46, 0x46, 0x36, 0x4a, + 0x3a, 0x42, 0x43, 0x39, 0x3f, 0x3d, 0x3c, 0x47, 0x38, 0x3f, 0x43, 0x40, + 0x36, 0x3c, 0x45, 0x3b, 0x33, 0x36, 0x3b, 0x39, 0x3c, 0x35, 0x40, 0x38, + 0x40, 0x3e, 0x3f, 0x48, 0x3f, 0x34, 0x40, 0x53, 0x26, 0x2c, 0x29, 0x39, + 0x2a, 0x38, 0x3f, 0x45, 0x32, 0x31, 0x4a, 0x37, 0x1c, 0x28, 0x09, 0x43, + 0x35, 0x3b, 0x33, 0x3c, 0x32, 0x3f, 0x28, 0x41, 0x36, 0x35, 0x3a, 0x37, + 0x41, 0x39, 0x32, 0x3c, 0x40, 0x3c, 0x3c, 0x32, 0x38, 0x39, 0x37, 0x44, + 0x3a, 0x33, 0x41, 0x36, 0x37, 0x3c, 0x35, 0x3a, 0x3d, 0x30, 0x3d, 0x41, + 0x37, 0x3c, 0x45, 0x3a, 0x37, 0x2f, 0x36, 0x3c, 0x3a, 0x3d, 0x39, 0x48, + 0x46, 0x33, 0x3a, 0x3e, 0x40, 0x3d, 0x3b, 0x52, 0x38, 0x45, 0x34, 0x47, + 0x39, 0x36, 0x37, 0x56, 0x42, 0x3f, 0x33, 0x36, 0x38, 0x3f, 0x40, 0x53, + 0x3e, 0x37, 0x3d, 0x3c, 0x48, 0x3a, 0x3d, 0x33, 0x39, 0x40, 0x3e, 0x35, + 0x3d, 0x46, 0x38, 0x36, 0x37, 0x43, 0x3a, 0x3c, 0x40, 0x38, 0x39, 0x3b, + 0x39, 0x3a, 0x42, 0x3d, 0x34, 0x3f, 0x35, 0x43, 0x3a, 0x35, 0x46, 0x3a, + 0x48, 0x38, 0x3b, 0x48, 0x3c, 0x35, 0x42, 0x3d, 0x3a, 0x3d, 0x38, 0x42, + 0x3e, 0x3c, 0x33, 0x39, 0x34, 0x30, 0x42, 0x44, 0x41, 0x3d, 0x3c, 0x39, + 0x3c, 0x3a, 0x39, 0x41, 0x3d, 0x44, 0x3c, 0x40, 0x3f, 0x3e, 0x42, 0x3f, + 0x37, 0x40, 0x39, 0x3b, 0x42, 0x43, 0x49, 0x37, 0x39, 0x46, 0x35, 0x3c, + 0x3e, 0x39, 0x45, 0x52, 0x24, 0x2d, 0x38, 0x35, 0x3a, 0x3a, 0x3c, 0x44, + 0x39, 0x32, 0x51, 0x3f, 0x16, 0x34, 0x0a, 0x49, 0x39, 0x38, 0x39, 0x3e, + 0x2f, 0x36, 0x24, 0x3f, 0x37, 0x34, 0x38, 0x3b, 0x34, 0x34, 0x30, 0x3b, + 0x3d, 0x36, 0x35, 0x42, 0x33, 0x40, 0x37, 0x35, 0x43, 0x3f, 0x3f, 0x39, + 0x3a, 0x43, 0x36, 0x3e, 0x39, 0x3d, 0x3f, 0x3d, 0x47, 0x3b, 0x39, 0x37, + 0x35, 0x42, 0x3f, 0x3b, 0x41, 0x3a, 0x42, 0x4b, 0x3d, 0x3f, 0x3d, 0x3e, + 0x38, 0x3b, 0x34, 0x4e, 0x3f, 0x39, 0x36, 0x43, 0x39, 0x35, 0x41, 0x4d, + 0x3c, 0x39, 0x43, 0x33, 0x37, 0x3b, 0x41, 0x48, 0x3c, 0x3f, 0x39, 0x32, + 0x35, 0x3d, 0x42, 0x35, 0x3d, 0x3e, 0x37, 0x3b, 0x38, 0x3a, 0x44, 0x36, + 0x42, 0x35, 0x48, 0x40, 0x3a, 0x44, 0x44, 0x39, 0x43, 0x41, 0x3c, 0x37, + 0x47, 0x3b, 0x42, 0x42, 0x45, 0x3a, 0x40, 0x46, 0x35, 0x3f, 0x3a, 0x48, + 0x35, 0x44, 0x3f, 0x37, 0x33, 0x3e, 0x45, 0x49, 0x39, 0x43, 0x47, 0x37, + 0x3f, 0x3f, 0x3b, 0x44, 0x38, 0x3d, 0x39, 0x42, 0x37, 0x3e, 0x40, 0x45, + 0x3b, 0x3f, 0x40, 0x34, 0x42, 0x3f, 0x43, 0x3c, 0x43, 0x41, 0x38, 0x38, + 0x38, 0x41, 0x55, 0x33, 0x33, 0x39, 0x39, 0x3c, 0x35, 0x39, 0x38, 0x42, + 0x27, 0x26, 0x32, 0x41, 0x41, 0x32, 0x3f, 0x47, 0x3a, 0x38, 0x48, 0x37, + 0x11, 0x27, 0x08, 0x49, 0x35, 0x42, 0x3c, 0x2e, 0x34, 0x43, 0x25, 0x3b, + 0x3a, 0x33, 0x37, 0x30, 0x3c, 0x36, 0x2d, 0x3c, 0x3b, 0x39, 0x3b, 0x40, + 0x46, 0x3a, 0x30, 0x42, 0x35, 0x32, 0x36, 0x3a, 0x3a, 0x34, 0x34, 0x33, + 0x3d, 0x30, 0x3b, 0x42, 0x41, 0x3f, 0x3d, 0x3b, 0x44, 0x3d, 0x41, 0x41, + 0x3d, 0x3f, 0x40, 0x51, 0x42, 0x42, 0x36, 0x45, 0x30, 0x40, 0x32, 0x4f, + 0x3a, 0x3c, 0x40, 0x39, 0x3d, 0x3b, 0x3e, 0x4b, 0x3d, 0x37, 0x42, 0x46, + 0x40, 0x40, 0x47, 0x3d, 0x35, 0x3c, 0x3f, 0x46, 0x37, 0x37, 0x3a, 0x2e, + 0x3d, 0x3c, 0x3a, 0x46, 0x3a, 0x44, 0x3c, 0x3a, 0x32, 0x44, 0x31, 0x41, + 0x43, 0x36, 0x49, 0x39, 0x3d, 0x37, 0x3f, 0x41, 0x3b, 0x3b, 0x3c, 0x42, + 0x3c, 0x34, 0x3f, 0x3b, 0x40, 0x3e, 0x48, 0x47, 0x3e, 0x3c, 0x38, 0x39, + 0x3f, 0x35, 0x39, 0x3f, 0x3e, 0x3e, 0x3b, 0x43, 0x41, 0x40, 0x43, 0x41, + 0x3f, 0x37, 0x39, 0x41, 0x46, 0x32, 0x3d, 0x41, 0x36, 0x3f, 0x3e, 0x3f, + 0x36, 0x48, 0x43, 0x3d, 0x43, 0x3f, 0x34, 0x3d, 0x34, 0x35, 0x4f, 0x32, + 0x3c, 0x3f, 0x3d, 0x3f, 0x39, 0x3c, 0x3d, 0x47, 0x23, 0x36, 0x33, 0x45, + 0x37, 0x2e, 0x42, 0x42, 0x39, 0x34, 0x4f, 0x3f, 0x19, 0x2b, 0x01, 0x50, + 0x35, 0x3f, 0x37, 0x3c, 0x33, 0x35, 0x25, 0x32, 0x38, 0x3e, 0x40, 0x40, + 0x2f, 0x38, 0x35, 0x3d, 0x31, 0x42, 0x44, 0x3c, 0x3a, 0x3d, 0x2d, 0x3e, + 0x3b, 0x3e, 0x3d, 0x31, 0x3b, 0x37, 0x35, 0x31, 0x36, 0x35, 0x34, 0x31, + 0x41, 0x3a, 0x33, 0x32, 0x3c, 0x31, 0x3e, 0x3d, 0x40, 0x3b, 0x34, 0x45, + 0x36, 0x39, 0x3e, 0x3f, 0x3c, 0x45, 0x37, 0x4b, 0x42, 0x3d, 0x33, 0x43, + 0x3e, 0x40, 0x35, 0x4e, 0x38, 0x36, 0x3a, 0x33, 0x38, 0x44, 0x3f, 0x3c, + 0x3f, 0x40, 0x3a, 0x3c, 0x3c, 0x3c, 0x44, 0x29, 0x3a, 0x40, 0x35, 0x3a, + 0x3d, 0x48, 0x3b, 0x30, 0x45, 0x41, 0x45, 0x40, 0x37, 0x32, 0x3a, 0x35, + 0x3f, 0x38, 0x3b, 0x43, 0x3b, 0x3f, 0x33, 0x40, 0x3b, 0x40, 0x38, 0x33, + 0x39, 0x3c, 0x3c, 0x3f, 0x43, 0x33, 0x43, 0x40, 0x43, 0x3d, 0x33, 0x42, + 0x40, 0x32, 0x3e, 0x36, 0x40, 0x38, 0x43, 0x40, 0x44, 0x38, 0x34, 0x3c, + 0x3e, 0x39, 0x47, 0x43, 0x40, 0x3b, 0x3f, 0x3f, 0x3c, 0x3b, 0x4b, 0x33, + 0x36, 0x49, 0x32, 0x41, 0x48, 0x45, 0x57, 0x3a, 0x40, 0x42, 0x40, 0x46, + 0x36, 0x35, 0x3c, 0x46, 0x22, 0x2e, 0x33, 0x3e, 0x3c, 0x39, 0x44, 0x4d, + 0x3f, 0x41, 0x51, 0x44, 0x15, 0x2e, 0x02, 0x4e, 0x39, 0x3a, 0x3c, 0x35, + 0x30, 0x38, 0x1e, 0x31, 0x40, 0x3b, 0x39, 0x3d, 0x3a, 0x37, 0x35, 0x36, + 0x46, 0x36, 0x3c, 0x3e, 0x39, 0x3e, 0x32, 0x40, 0x3b, 0x35, 0x42, 0x41, + 0x41, 0x38, 0x41, 0x35, 0x42, 0x36, 0x3c, 0x42, 0x3d, 0x41, 0x35, 0x31, + 0x3f, 0x44, 0x3e, 0x41, 0x3f, 0x35, 0x42, 0x4b, 0x3e, 0x36, 0x37, 0x34, + 0x36, 0x3d, 0x40, 0x49, 0x41, 0x3e, 0x3d, 0x3b, 0x38, 0x37, 0x40, 0x47, + 0x35, 0x32, 0x43, 0x38, 0x36, 0x3b, 0x33, 0x47, 0x33, 0x34, 0x3d, 0x47, + 0x3c, 0x37, 0x3d, 0x2b, 0x3a, 0x36, 0x3b, 0x3d, 0x43, 0x38, 0x35, 0x32, + 0x32, 0x37, 0x43, 0x36, 0x3f, 0x48, 0x38, 0x30, 0x3a, 0x3c, 0x42, 0x34, + 0x37, 0x3c, 0x37, 0x40, 0x48, 0x3e, 0x35, 0x3b, 0x3f, 0x38, 0x39, 0x3e, + 0x37, 0x35, 0x36, 0x3d, 0x3b, 0x3c, 0x40, 0x3d, 0x34, 0x40, 0x46, 0x42, + 0x3f, 0x3c, 0x3c, 0x3e, 0x40, 0x40, 0x3d, 0x3f, 0x3f, 0x44, 0x46, 0x41, + 0x32, 0x43, 0x40, 0x41, 0x3c, 0x42, 0x39, 0x38, 0x48, 0x44, 0x3d, 0x38, + 0x34, 0x40, 0x4e, 0x31, 0x3c, 0x42, 0x39, 0x48, 0x3c, 0x33, 0x3e, 0x40, + 0x20, 0x27, 0x39, 0x45, 0x45, 0x36, 0x47, 0x4c, 0x35, 0x3e, 0x4a, 0x36, + 0x16, 0x2f, 0x04, 0x4f, 0x3a, 0x35, 0x36, 0x3a, 0x2d, 0x36, 0x21, 0x34, + 0x3b, 0x32, 0x3d, 0x3c, 0x3c, 0x3f, 0x3b, 0x3b, 0x41, 0x46, 0x40, 0x3d, + 0x3b, 0x44, 0x33, 0x42, 0x34, 0x33, 0x3e, 0x45, 0x3f, 0x46, 0x39, 0x33, + 0x3b, 0x37, 0x37, 0x37, 0x42, 0x47, 0x3c, 0x35, 0x31, 0x41, 0x44, 0x3a, + 0x3b, 0x33, 0x39, 0x44, 0x42, 0x33, 0x3d, 0x3f, 0x43, 0x33, 0x41, 0x4a, + 0x35, 0x46, 0x36, 0x3e, 0x39, 0x41, 0x41, 0x4c, 0x34, 0x3d, 0x38, 0x33, + 0x3c, 0x3f, 0x43, 0x44, 0x37, 0x35, 0x35, 0x3c, 0x43, 0x34, 0x3e, 0x2d, + 0x3f, 0x35, 0x38, 0x3c, 0x33, 0x35, 0x43, 0x2a, 0x40, 0x33, 0x34, 0x40, + 0x3d, 0x38, 0x36, 0x2d, 0x36, 0x3c, 0x43, 0x3d, 0x37, 0x3d, 0x39, 0x38, + 0x3b, 0x3e, 0x3c, 0x46, 0x35, 0x35, 0x43, 0x44, 0x39, 0x40, 0x34, 0x39, + 0x3d, 0x34, 0x40, 0x45, 0x38, 0x35, 0x3e, 0x39, 0x3c, 0x44, 0x48, 0x44, + 0x41, 0x3e, 0x3c, 0x45, 0x3a, 0x3c, 0x3c, 0x46, 0x3a, 0x40, 0x39, 0x43, + 0x35, 0x35, 0x3e, 0x45, 0x3a, 0x34, 0x3c, 0x39, 0x46, 0x3a, 0x4f, 0x35, + 0x32, 0x3d, 0x36, 0x41, 0x32, 0x38, 0x3f, 0x45, 0x2d, 0x34, 0x2a, 0x35, + 0x43, 0x3f, 0x41, 0x49, 0x41, 0x3c, 0x4b, 0x3f, 0x17, 0x31, 0x02, 0x4f, + 0x30, 0x38, 0x39, 0x40, 0x33, 0x3a, 0x25, 0x38, 0x35, 0x3c, 0x39, 0x35, + 0x34, 0x41, 0x34, 0x43, 0x40, 0x40, 0x46, 0x3d, 0x40, 0x38, 0x3f, 0x3b, + 0x35, 0x39, 0x3c, 0x39, 0x34, 0x38, 0x3f, 0x36, 0x3a, 0x38, 0x44, 0x3f, + 0x3f, 0x38, 0x3c, 0x33, 0x41, 0x42, 0x38, 0x33, 0x3c, 0x3b, 0x3c, 0x46, + 0x38, 0x3b, 0x3f, 0x33, 0x3f, 0x48, 0x3b, 0x49, 0x3f, 0x3a, 0x3d, 0x3f, + 0x47, 0x3d, 0x30, 0x45, 0x36, 0x42, 0x3d, 0x36, 0x43, 0x38, 0x3b, 0x3d, + 0x3c, 0x30, 0x3b, 0x43, 0x3d, 0x41, 0x34, 0x2e, 0x43, 0x3d, 0x43, 0x46, + 0x43, 0x3c, 0x3c, 0x2e, 0x3c, 0x43, 0x34, 0x43, 0x3e, 0x43, 0x3f, 0x2b, + 0x45, 0x40, 0x3a, 0x43, 0x36, 0x39, 0x3f, 0x3d, 0x3a, 0x3c, 0x35, 0x3b, + 0x36, 0x3f, 0x45, 0x3e, 0x45, 0x40, 0x3f, 0x36, 0x45, 0x42, 0x35, 0x3e, + 0x3a, 0x3a, 0x3f, 0x40, 0x3e, 0x3c, 0x39, 0x46, 0x43, 0x3e, 0x3f, 0x3f, + 0x40, 0x3c, 0x40, 0x4b, 0x41, 0x35, 0x3b, 0x3e, 0x49, 0x32, 0x3e, 0x41, + 0x31, 0x37, 0x3d, 0x3b, 0x3f, 0x45, 0x50, 0x3a, 0x3f, 0x3c, 0x44, 0x36, + 0x43, 0x37, 0x3d, 0x4b, 0x29, 0x39, 0x2f, 0x38, 0x45, 0x36, 0x40, 0x4e, + 0x39, 0x3f, 0x48, 0x43, 0x23, 0x3c, 0x06, 0x51, 0x37, 0x3b, 0x3e, 0x3b, + 0x28, 0x45, 0x2b, 0x37, 0x3f, 0x33, 0x3f, 0x41, 0x31, 0x36, 0x33, 0x3a, + 0x3a, 0x35, 0x3b, 0x33, 0x3e, 0x36, 0x35, 0x40, 0x3a, 0x34, 0x3a, 0x38, + 0x34, 0x3a, 0x3a, 0x34, 0x42, 0x45, 0x40, 0x3e, 0x40, 0x38, 0x39, 0x34, + 0x38, 0x37, 0x3f, 0x3e, 0x3c, 0x32, 0x3f, 0x46, 0x3f, 0x44, 0x3b, 0x3e, + 0x44, 0x45, 0x36, 0x3e, 0x36, 0x3f, 0x3b, 0x40, 0x39, 0x34, 0x38, 0x41, + 0x42, 0x3e, 0x3d, 0x47, 0x3e, 0x45, 0x33, 0x40, 0x3e, 0x3a, 0x44, 0x3d, + 0x3c, 0x3a, 0x3a, 0x2c, 0x3a, 0x3d, 0x35, 0x45, 0x3c, 0x41, 0x36, 0x30, + 0x32, 0x32, 0x3a, 0x3b, 0x35, 0x3c, 0x43, 0x2d, 0x35, 0x3f, 0x41, 0x37, + 0x3f, 0x46, 0x34, 0x39, 0x3c, 0x43, 0x40, 0x3e, 0x3e, 0x36, 0x3e, 0x3c, + 0x37, 0x3a, 0x3d, 0x3a, 0x3c, 0x38, 0x44, 0x41, 0x3f, 0x3b, 0x3c, 0x47, + 0x40, 0x3b, 0x41, 0x47, 0x3e, 0x45, 0x39, 0x3e, 0x37, 0x45, 0x4b, 0x4c, + 0x37, 0x37, 0x37, 0x3c, 0x3c, 0x3d, 0x40, 0x38, 0x39, 0x3e, 0x43, 0x3f, + 0x38, 0x45, 0x51, 0x3c, 0x31, 0x34, 0x3b, 0x48, 0x46, 0x41, 0x40, 0x40, + 0x2c, 0x39, 0x32, 0x42, 0x3c, 0x2e, 0x49, 0x4d, 0x3c, 0x3f, 0x45, 0x38, + 0x20, 0x38, 0x03, 0x55, 0x33, 0x3e, 0x32, 0x39, 0x32, 0x3b, 0x24, 0x2b, + 0x42, 0x35, 0x45, 0x32, 0x2e, 0x3b, 0x2f, 0x3f, 0x3c, 0x37, 0x39, 0x3b, + 0x34, 0x34, 0x3d, 0x36, 0x3d, 0x39, 0x3b, 0x30, 0x3c, 0x3e, 0x40, 0x32, + 0x3d, 0x3c, 0x3c, 0x3e, 0x33, 0x33, 0x3f, 0x3a, 0x33, 0x3e, 0x46, 0x36, + 0x3a, 0x3d, 0x40, 0x40, 0x3f, 0x41, 0x3a, 0x42, 0x34, 0x32, 0x34, 0x46, + 0x3b, 0x31, 0x40, 0x37, 0x37, 0x32, 0x3e, 0x47, 0x3f, 0x3b, 0x3e, 0x43, + 0x49, 0x45, 0x3a, 0x3d, 0x3e, 0x44, 0x40, 0x31, 0x39, 0x3e, 0x3b, 0x2d, + 0x3b, 0x3a, 0x33, 0x3d, 0x39, 0x37, 0x3e, 0x32, 0x41, 0x3c, 0x3a, 0x37, + 0x3b, 0x40, 0x39, 0x2f, 0x3e, 0x3f, 0x47, 0x32, 0x3e, 0x3b, 0x3e, 0x3e, + 0x40, 0x3e, 0x40, 0x3c, 0x41, 0x39, 0x38, 0x46, 0x45, 0x32, 0x47, 0x31, + 0x36, 0x47, 0x37, 0x49, 0x3a, 0x3f, 0x47, 0x3a, 0x41, 0x3b, 0x3c, 0x4f, + 0x3e, 0x36, 0x3b, 0x47, 0x35, 0x39, 0x41, 0x4e, 0x3d, 0x3e, 0x3b, 0x46, + 0x38, 0x39, 0x3b, 0x45, 0x3e, 0x3f, 0x44, 0x42, 0x44, 0x3f, 0x55, 0x3b, + 0x41, 0x3d, 0x43, 0x43, 0x37, 0x3f, 0x3d, 0x4c, 0x28, 0x3d, 0x36, 0x3c, + 0x3e, 0x3e, 0x48, 0x50, 0x3e, 0x39, 0x45, 0x41, 0x22, 0x37, 0x07, 0x4f, + 0x2e, 0x33, 0x38, 0x3f, 0x31, 0x3a, 0x1b, 0x36, 0x34, 0x38, 0x3c, 0x37, + 0x37, 0x3e, 0x36, 0x35, 0x36, 0x3b, 0x3d, 0x38, 0x42, 0x48, 0x3d, 0x40, + 0x40, 0x44, 0x3d, 0x39, 0x37, 0x3b, 0x3d, 0x33, 0x3d, 0x35, 0x42, 0x3c, + 0x39, 0x3e, 0x43, 0x2d, 0x3c, 0x40, 0x43, 0x43, 0x45, 0x35, 0x3c, 0x44, + 0x34, 0x3c, 0x3d, 0x31, 0x39, 0x40, 0x39, 0x3d, 0x3e, 0x34, 0x3e, 0x3b, + 0x40, 0x38, 0x42, 0x4a, 0x40, 0x3b, 0x35, 0x3d, 0x36, 0x38, 0x35, 0x42, + 0x3c, 0x3c, 0x3d, 0x3b, 0x38, 0x39, 0x45, 0x28, 0x3a, 0x37, 0x37, 0x35, + 0x3a, 0x3d, 0x35, 0x2a, 0x3c, 0x3f, 0x37, 0x34, 0x37, 0x3f, 0x3e, 0x2b, + 0x39, 0x43, 0x3b, 0x45, 0x35, 0x36, 0x36, 0x42, 0x33, 0x38, 0x3b, 0x35, + 0x31, 0x3f, 0x41, 0x41, 0x3c, 0x41, 0x45, 0x42, 0x3b, 0x3c, 0x39, 0x46, + 0x3c, 0x3e, 0x3a, 0x41, 0x39, 0x3d, 0x41, 0x4b, 0x40, 0x3f, 0x43, 0x3d, + 0x39, 0x39, 0x44, 0x44, 0x37, 0x42, 0x3f, 0x44, 0x3e, 0x37, 0x42, 0x35, + 0x44, 0x3f, 0x40, 0x42, 0x3f, 0x3a, 0x47, 0x3d, 0x38, 0x3a, 0x3b, 0x3a, + 0x42, 0x36, 0x3a, 0x97, 0x32, 0x31, 0x30, 0x36, 0x47, 0x3e, 0x46, 0x51, + 0x42, 0x34, 0x50, 0x34, 0x26, 0x3b, 0x06, 0x55, 0x3c, 0x3b, 0x2d, 0x3a, + 0x37, 0x37, 0x1b, 0x32, 0x39, 0x3d, 0x36, 0x40, 0x3b, 0x3f, 0x33, 0x33, + 0x3d, 0x37, 0x35, 0x37, 0x44, 0x3f, 0x35, 0x39, 0x33, 0x3c, 0x43, 0x39, + 0x3f, 0x42, 0x3e, 0x34, 0x38, 0x38, 0x39, 0x3c, 0x48, 0x3c, 0x2f, 0x30, + 0x40, 0x3c, 0x41, 0x3e, 0x3f, 0x3e, 0x36, 0x43, 0x40, 0x3c, 0x36, 0x43, + 0x43, 0x38, 0x3a, 0x47, 0x3e, 0x37, 0x39, 0x3a, 0x43, 0x45, 0x38, 0x43, + 0x3b, 0x45, 0x37, 0x44, 0x36, 0x45, 0x3a, 0x3e, 0x3e, 0x3e, 0x3d, 0x33, + 0x39, 0x36, 0x48, 0x33, 0x30, 0x42, 0x33, 0x39, 0x37, 0x3a, 0x3f, 0x34, + 0x34, 0x40, 0x40, 0x40, 0x3f, 0x3d, 0x3f, 0x33, 0x41, 0x40, 0x3b, 0x43, + 0x3b, 0x3a, 0x40, 0x3a, 0x38, 0x3e, 0x38, 0x3b, 0x38, 0x42, 0x40, 0x40, + 0x41, 0x35, 0x37, 0x38, 0x3b, 0x3c, 0x39, 0x4b, 0x32, 0x39, 0x42, 0x3c, + 0x36, 0x3d, 0x32, 0x52, 0x3a, 0x31, 0x40, 0x40, 0x3a, 0x43, 0x3d, 0x46, + 0x3c, 0x3e, 0x3e, 0x33, 0x3f, 0x41, 0x4d, 0x37, 0x39, 0x39, 0x3e, 0x3b, + 0x40, 0x39, 0x53, 0x2d, 0x46, 0x3c, 0x32, 0x42, 0x3d, 0x40, 0x40, 0x4d, + 0x2e, 0x34, 0x39, 0x3b, 0x46, 0x3b, 0x42, 0x4f, 0x3d, 0x39, 0x4e, 0x36, + 0x1a, 0x31, 0x0e, 0x56, 0x36, 0x42, 0x38, 0x44, 0x36, 0x3a, 0x20, 0x30, + 0x36, 0x34, 0x37, 0x38, 0x40, 0x41, 0x2a, 0x35, 0x3b, 0x3b, 0x3a, 0x38, + 0x33, 0x39, 0x36, 0x41, 0x43, 0x39, 0x35, 0x3d, 0x37, 0x3d, 0x33, 0x31, + 0x45, 0x33, 0x3f, 0x3b, 0x44, 0x38, 0x39, 0x34, 0x38, 0x39, 0x38, 0x3d, + 0x3a, 0x3a, 0x41, 0x40, 0x44, 0x3e, 0x3f, 0x45, 0x34, 0x31, 0x34, 0x43, + 0x3b, 0x34, 0x42, 0x3c, 0x3c, 0x43, 0x35, 0x45, 0x36, 0x38, 0x3d, 0x3c, + 0x3f, 0x3d, 0x3e, 0x45, 0x41, 0x43, 0x35, 0x3f, 0x40, 0x3f, 0x3a, 0x34, + 0x3d, 0x32, 0x41, 0x3d, 0x48, 0x42, 0x37, 0x2a, 0x3c, 0x3a, 0x3e, 0x49, + 0x38, 0x36, 0x38, 0x2e, 0x36, 0x37, 0x34, 0x3e, 0x3c, 0x43, 0x43, 0x39, + 0x39, 0x3b, 0x44, 0x46, 0x44, 0x43, 0x37, 0x46, 0x43, 0x34, 0x3b, 0x35, + 0x42, 0x41, 0x3f, 0x3d, 0x3d, 0x3a, 0x42, 0x3e, 0x38, 0x47, 0x3d, 0x49, + 0x45, 0x49, 0x3a, 0x3c, 0x3e, 0x37, 0x40, 0x46, 0x41, 0x33, 0x45, 0x36, + 0x37, 0x44, 0x49, 0x3b, 0x44, 0x40, 0x33, 0x46, 0x37, 0x39, 0x4e, 0x3a, + 0x43, 0x38, 0x3a, 0x42, 0x3a, 0x3d, 0x45, 0x50, 0x26, 0x34, 0x3b, 0x3c, + 0x46, 0x46, 0x4c, 0x54, 0x3f, 0x35, 0x4e, 0x47, 0x21, 0x39, 0x0e, 0x54, + 0x3a, 0x3a, 0x2f, 0x40, 0x2d, 0x3a, 0x1f, 0x31, 0x31, 0x42, 0x34, 0x45, + 0x37, 0x36, 0x30, 0x3b, 0x3a, 0x3a, 0x36, 0x40, 0x32, 0x36, 0x3c, 0x3c, + 0x37, 0x42, 0x35, 0x3e, 0x39, 0x47, 0x36, 0x32, 0x41, 0x30, 0x42, 0x39, + 0x39, 0x44, 0x37, 0x30, 0x41, 0x3b, 0x3d, 0x3d, 0x43, 0x3b, 0x38, 0x45, + 0x3b, 0x3a, 0x39, 0x3a, 0x31, 0x33, 0x43, 0x46, 0x3f, 0x41, 0x44, 0x3f, + 0x3b, 0x44, 0x3a, 0x4c, 0x33, 0x33, 0x33, 0x3e, 0x37, 0x3e, 0x45, 0x45, + 0x36, 0x42, 0x3e, 0x43, 0x40, 0x34, 0x36, 0x31, 0x38, 0x34, 0x41, 0x3b, + 0x32, 0x38, 0x3e, 0x29, 0x47, 0x33, 0x37, 0x45, 0x3c, 0x3d, 0x43, 0x2c, + 0x36, 0x3a, 0x3c, 0x40, 0x3d, 0x46, 0x3c, 0x37, 0x40, 0x44, 0x37, 0x38, + 0x3e, 0x41, 0x3c, 0x40, 0x33, 0x3f, 0x44, 0x32, 0x44, 0x3a, 0x43, 0x42, + 0x3e, 0x38, 0x44, 0x3b, 0x41, 0x48, 0x3f, 0x4e, 0x3f, 0x44, 0x35, 0x45, + 0x34, 0x3f, 0x42, 0x4b, 0x37, 0x37, 0x3e, 0x45, 0x46, 0x45, 0x46, 0x3d, + 0x3e, 0x39, 0x3b, 0x3a, 0x46, 0x3a, 0x56, 0x35, 0x46, 0x3d, 0x40, 0x3b, + 0x36, 0x39, 0x3f, 0x54, 0x27, 0x2b, 0x34, 0x3c, 0x48, 0x3d, 0x49, 0x4c, + 0x3e, 0x3d, 0x4e, 0x42, 0x25, 0x3b, 0x10, 0x4d, 0x30, 0x36, 0x3e, 0x36, + 0x2e, 0x31, 0x1d, 0x37, 0x3a, 0x39, 0x33, 0x3f, 0x39, 0x38, 0x2e, 0x36, + 0x44, 0x3e, 0x41, 0x37, 0x3b, 0x30, 0x3b, 0x48, 0x31, 0x39, 0x41, 0x3e, + 0x37, 0x37, 0x34, 0x2f, 0x35, 0x3b, 0x3a, 0x3e, 0x45, 0x3e, 0x3f, 0x35, + 0x39, 0x39, 0x3b, 0x44, 0x43, 0x3c, 0x3e, 0x46, 0x40, 0x3a, 0x36, 0x45, + 0x41, 0x40, 0x36, 0x44, 0x3a, 0x37, 0x47, 0x47, 0x3d, 0x36, 0x43, 0x4e, + 0x3b, 0x38, 0x40, 0x48, 0x44, 0x43, 0x45, 0x3f, 0x43, 0x3c, 0x3b, 0x37, + 0x43, 0x41, 0x39, 0x2f, 0x3d, 0x45, 0x3e, 0x3e, 0x42, 0x40, 0x41, 0x2f, + 0x47, 0x38, 0x3a, 0x48, 0x3e, 0x35, 0x37, 0x2a, 0x34, 0x38, 0x41, 0x3b, + 0x3d, 0x37, 0x3b, 0x35, 0x38, 0x3e, 0x41, 0x3c, 0x41, 0x43, 0x3d, 0x46, + 0x47, 0x47, 0x3d, 0x35, 0x48, 0x41, 0x3d, 0x3e, 0x34, 0x47, 0x38, 0x38, + 0x39, 0x3e, 0x38, 0x4d, 0x43, 0x36, 0x42, 0x40, 0x3e, 0x41, 0x3f, 0x4c, + 0x3e, 0x3e, 0x37, 0x44, 0x3e, 0x3b, 0x47, 0x3e, 0x3f, 0x3b, 0x39, 0x3c, + 0x3c, 0x3c, 0x53, 0x3b, 0x3b, 0x32, 0x3e, 0x3f, 0x32, 0x3c, 0x37, 0x4b, + 0x33, 0x30, 0x2f, 0x41, 0x47, 0x42, 0x49, 0x4f, 0x3b, 0x42, 0x4c, 0x44, + 0x1f, 0x37, 0x16, 0x4e, 0x3b, 0x3f, 0x30, 0x36, 0x35, 0x38, 0x26, 0x36, + 0x32, 0x3b, 0x38, 0x3c, 0x30, 0x3e, 0x34, 0x3e, 0x3d, 0x34, 0x39, 0x3c, + 0x36, 0x47, 0x34, 0x41, 0x31, 0x39, 0x44, 0x3e, 0x39, 0x41, 0x32, 0x36, + 0x3b, 0x3f, 0x32, 0x3d, 0x36, 0x3e, 0x40, 0x3d, 0x45, 0x32, 0x45, 0x42, + 0x38, 0x43, 0x40, 0x42, 0x34, 0x3a, 0x43, 0x38, 0x47, 0x3f, 0x41, 0x47, + 0x34, 0x44, 0x41, 0x39, 0x3c, 0x46, 0x36, 0x4f, 0x41, 0x3e, 0x38, 0x38, + 0x3a, 0x3b, 0x43, 0x44, 0x37, 0x3f, 0x35, 0x43, 0x34, 0x3d, 0x40, 0x32, + 0x3a, 0x3b, 0x3d, 0x34, 0x35, 0x43, 0x31, 0x2c, 0x3b, 0x36, 0x38, 0x41, + 0x3c, 0x38, 0x3d, 0x31, 0x45, 0x46, 0x42, 0x41, 0x33, 0x3f, 0x3f, 0x3a, + 0x36, 0x3f, 0x3c, 0x3c, 0x3c, 0x3e, 0x39, 0x3e, 0x40, 0x37, 0x47, 0x3e, + 0x35, 0x39, 0x3d, 0x3d, 0x37, 0x36, 0x3e, 0x45, 0x38, 0x3d, 0x45, 0x43, + 0x3a, 0x32, 0x3b, 0x3a, 0x32, 0x3c, 0x3d, 0x43, 0x3d, 0x33, 0x3b, 0x3d, + 0x46, 0x3a, 0x44, 0x45, 0x3b, 0x3e, 0x3c, 0x42, 0x37, 0x37, 0x52, 0x2a, + 0x3a, 0x35, 0x35, 0x3f, 0x40, 0x38, 0x40, 0x5b, 0x35, 0x32, 0x2b, 0x3d, + 0x4a, 0x3c, 0x46, 0x56, 0x44, 0x30, 0x4d, 0x39, 0x20, 0x32, 0x0f, 0x4f, + 0x33, 0x3c, 0x35, 0x35, 0x3a, 0x45, 0x29, 0x3b, 0x31, 0x38, 0x34, 0x38, + 0x42, 0x45, 0x37, 0x3e, 0x37, 0x2e, 0x36, 0x43, 0x3f, 0x38, 0x2f, 0x41, + 0x3f, 0x41, 0x3c, 0x31, 0x37, 0x36, 0x37, 0x39, 0x41, 0x3a, 0x3a, 0x40, + 0x3e, 0x47, 0x3d, 0x37, 0x3c, 0x38, 0x35, 0x39, 0x3a, 0x43, 0x3f, 0x42, + 0x42, 0x38, 0x3e, 0x40, 0x3c, 0x3a, 0x45, 0x48, 0x37, 0x3a, 0x3e, 0x35, + 0x3a, 0x3d, 0x45, 0x4a, 0x3d, 0x37, 0x38, 0x3a, 0x3d, 0x46, 0x46, 0x41, + 0x37, 0x41, 0x40, 0x48, 0x37, 0x34, 0x3b, 0x2c, 0x39, 0x34, 0x37, 0x35, + 0x3a, 0x43, 0x39, 0x2e, 0x39, 0x3f, 0x40, 0x3e, 0x40, 0x40, 0x3c, 0x2d, + 0x3e, 0x3c, 0x37, 0x39, 0x3c, 0x3b, 0x3d, 0x3f, 0x41, 0x48, 0x3b, 0x3d, + 0x3b, 0x41, 0x45, 0x3e, 0x3a, 0x38, 0x3f, 0x3c, 0x3d, 0x3e, 0x40, 0x42, + 0x46, 0x38, 0x43, 0x34, 0x35, 0x47, 0x3d, 0x46, 0x3f, 0x3e, 0x32, 0x3f, + 0x3e, 0x3d, 0x47, 0x46, 0x38, 0x41, 0x45, 0x3f, 0x34, 0x3f, 0x41, 0x43, + 0x3e, 0x3e, 0x44, 0x3b, 0x3b, 0x36, 0x51, 0x32, 0x37, 0x3c, 0x42, 0x43, + 0x33, 0x39, 0x42, 0x61, 0x2c, 0x3b, 0x2e, 0x39, 0x42, 0x39, 0x42, 0x54, + 0x3c, 0x3a, 0x48, 0x35, 0x26, 0x34, 0x15, 0x51, 0x35, 0x40, 0x36, 0x3c, + 0x2d, 0x37, 0x25, 0x38, 0x33, 0x3d, 0x3d, 0x39, 0x3e, 0x3b, 0x2e, 0x4b, + 0x3d, 0x3b, 0x42, 0x37, 0x37, 0x40, 0x37, 0x40, 0x35, 0x45, 0x37, 0x37, + 0x3f, 0x41, 0x36, 0x39, 0x3c, 0x32, 0x3e, 0x38, 0x41, 0x40, 0x3e, 0x3f, + 0x3b, 0x3c, 0x43, 0x35, 0x3e, 0x3d, 0x44, 0x44, 0x3a, 0x36, 0x39, 0x3f, + 0x3a, 0x31, 0x42, 0x4d, 0x40, 0x33, 0x40, 0x45, 0x44, 0x3d, 0x40, 0x49, + 0x41, 0x3f, 0x42, 0x3a, 0x34, 0x46, 0x38, 0x46, 0x42, 0x34, 0x3a, 0x40, + 0x40, 0x41, 0x3d, 0x32, 0x35, 0x48, 0x35, 0x3e, 0x44, 0x41, 0x40, 0x2c, + 0x46, 0x38, 0x38, 0x3f, 0x36, 0x40, 0x38, 0x2a, 0x43, 0x41, 0x3e, 0x35, + 0x46, 0x3a, 0x45, 0x46, 0x46, 0x42, 0x3a, 0x3b, 0x40, 0x38, 0x35, 0x43, + 0x38, 0x3d, 0x3b, 0x41, 0x36, 0x44, 0x3f, 0x3f, 0x34, 0x3e, 0x3c, 0x3d, + 0x49, 0x36, 0x37, 0x4b, 0x38, 0x3c, 0x43, 0x37, 0x3a, 0x3f, 0x31, 0x45, + 0x3b, 0x39, 0x3f, 0x40, 0x37, 0x3c, 0x42, 0x3f, 0x3c, 0x33, 0x40, 0x3b, + 0x32, 0x3c, 0x52, 0x31, 0x3d, 0x44, 0x3b, 0x31, 0x46, 0x38, 0x40, 0x60, + 0x2b, 0x3c, 0x37, 0x34, 0x43, 0x38, 0x45, 0x57, 0x37, 0x39, 0x49, 0x33, + 0x2d, 0x3f, 0x18, 0x4e, 0x39, 0x39, 0x32, 0x3b, 0x34, 0x3b, 0x2c, 0x45, + 0x33, 0x37, 0x45, 0x42, 0x3d, 0x37, 0x2a, 0x4c, 0x3d, 0x3f, 0x3c, 0x36, + 0x37, 0x3c, 0x39, 0x47, 0x3d, 0x44, 0x3d, 0x40, 0x3d, 0x41, 0x34, 0x3e, + 0x40, 0x34, 0x3b, 0x3a, 0x41, 0x36, 0x37, 0x40, 0x3e, 0x3f, 0x3a, 0x36, + 0x3e, 0x35, 0x3b, 0x48, 0x41, 0x40, 0x3c, 0x42, 0x34, 0x41, 0x3f, 0x44, + 0x34, 0x39, 0x33, 0x39, 0x39, 0x47, 0x40, 0x48, 0x38, 0x3a, 0x43, 0x43, + 0x48, 0x3a, 0x3f, 0x46, 0x35, 0x3a, 0x33, 0x36, 0x32, 0x3c, 0x40, 0x34, + 0x40, 0x3a, 0x42, 0x3a, 0x39, 0x38, 0x41, 0x35, 0x3a, 0x3f, 0x35, 0x40, + 0x3f, 0x39, 0x39, 0x36, 0x38, 0x40, 0x3e, 0x3e, 0x3a, 0x31, 0x32, 0x44, + 0x40, 0x47, 0x3a, 0x3c, 0x43, 0x43, 0x46, 0x48, 0x40, 0x35, 0x3d, 0x37, + 0x44, 0x37, 0x33, 0x44, 0x3b, 0x3e, 0x3f, 0x37, 0x36, 0x3a, 0x38, 0x47, + 0x3a, 0x44, 0x36, 0x42, 0x3e, 0x44, 0x34, 0x46, 0x33, 0x43, 0x44, 0x3e, + 0x30, 0x48, 0x37, 0x38, 0x33, 0x3c, 0x46, 0x42, 0x38, 0x3d, 0x50, 0x39, + 0x33, 0x38, 0x3e, 0x40, 0x3b, 0x2b, 0x3b, 0x5f, 0x2b, 0x32, 0x2f, 0x37, + 0x3f, 0x3a, 0x40, 0x4e, 0x34, 0x38, 0x47, 0x37, 0x27, 0x2b, 0x1b, 0x4f, + 0x36, 0x38, 0x3a, 0x3a, 0x3b, 0x38, 0x2e, 0x3f, 0x3f, 0x42, 0x42, 0x42, + 0x36, 0x3e, 0x3c, 0x55, 0x39, 0x40, 0x44, 0x43, 0x3e, 0x33, 0x3c, 0x43, + 0x38, 0x44, 0x3b, 0x46, 0x3f, 0x45, 0x34, 0x38, 0x3c, 0x41, 0x42, 0x3d, + 0x42, 0x36, 0x43, 0x3f, 0x3c, 0x39, 0x3e, 0x39, 0x39, 0x42, 0x33, 0x47, + 0x36, 0x3d, 0x3f, 0x3b, 0x40, 0x39, 0x3b, 0x49, 0x36, 0x40, 0x3d, 0x41, + 0x40, 0x34, 0x3b, 0x4e, 0x3b, 0x36, 0x3b, 0x45, 0x40, 0x32, 0x3b, 0x49, + 0x37, 0x38, 0x3a, 0x47, 0x37, 0x40, 0x3e, 0x38, 0x40, 0x3f, 0x3c, 0x3a, + 0x47, 0x41, 0x42, 0x30, 0x40, 0x3c, 0x42, 0x3f, 0x31, 0x44, 0x39, 0x38, + 0x3b, 0x38, 0x42, 0x43, 0x41, 0x35, 0x3a, 0x39, 0x3e, 0x38, 0x39, 0x3e, + 0x3c, 0x42, 0x3d, 0x49, 0x47, 0x3c, 0x3f, 0x35, 0x41, 0x3a, 0x36, 0x43, + 0x43, 0x3b, 0x39, 0x3b, 0x36, 0x43, 0x43, 0x4e, 0x3e, 0x35, 0x37, 0x3b, + 0x3f, 0x37, 0x41, 0x48, 0x32, 0x44, 0x43, 0x32, 0x38, 0x39, 0x45, 0x39, + 0x3e, 0x3d, 0x35, 0x39, 0x35, 0x39, 0x50, 0x37, 0x39, 0x40, 0x43, 0x47, + 0x32, 0x2a, 0x40, 0x62, 0x24, 0x30, 0x36, 0x3e, 0x41, 0x32, 0x47, 0x58, + 0x39, 0x36, 0x44, 0x34, 0x26, 0x34, 0x1e, 0x50, 0x3c, 0x3b, 0x3f, 0x42, + 0x35, 0x3d, 0x2a, 0x4e, 0x40, 0x38, 0x36, 0x31, 0x3a, 0x30, 0x37, 0x4b, + 0x3c, 0x3b, 0x3b, 0x41, 0x3b, 0x3c, 0x2e, 0x45, 0x44, 0x3f, 0x3b, 0x35, + 0x3e, 0x33, 0x37, 0x3d, 0x40, 0x39, 0x39, 0x37, 0x40, 0x3e, 0x3a, 0x3e, + 0x3c, 0x3c, 0x45, 0x40, 0x3c, 0x3f, 0x3a, 0x51, 0x47, 0x3a, 0x34, 0x39, + 0x3b, 0x34, 0x44, 0x4c, 0x36, 0x3d, 0x3a, 0x35, 0x34, 0x36, 0x38, 0x4b, + 0x3f, 0x40, 0x3f, 0x3e, 0x40, 0x41, 0x47, 0x43, 0x32, 0x38, 0x46, 0x44, + 0x46, 0x43, 0x43, 0x37, 0x39, 0x49, 0x37, 0x36, 0x3e, 0x3d, 0x37, 0x3c, + 0x39, 0x37, 0x34, 0x43, 0x45, 0x32, 0x3a, 0x3a, 0x38, 0x43, 0x3b, 0x40, + 0x3b, 0x3f, 0x3d, 0x41, 0x40, 0x3d, 0x3a, 0x3b, 0x48, 0x37, 0x3d, 0x41, + 0x40, 0x3e, 0x38, 0x41, 0x3d, 0x3a, 0x38, 0x49, 0x40, 0x3c, 0x42, 0x41, + 0x3a, 0x38, 0x38, 0x4c, 0x3e, 0x41, 0x40, 0x3b, 0x3d, 0x3e, 0x3c, 0x46, + 0x3e, 0x42, 0x41, 0x38, 0x42, 0x42, 0x41, 0x3e, 0x3e, 0x37, 0x3c, 0x43, + 0x43, 0x3b, 0x54, 0x2b, 0x45, 0x3b, 0x43, 0x41, 0x41, 0x26, 0x3f, 0x60, + 0x25, 0x2b, 0x2e, 0x3a, 0x40, 0x31, 0x40, 0x49, 0x40, 0x31, 0x46, 0x3c, + 0x1e, 0x2a, 0x1a, 0x47, 0x33, 0x37, 0x37, 0x34, 0x31, 0x36, 0x25, 0x41, + 0x2e, 0x36, 0x35, 0x33, 0x33, 0x34, 0x31, 0x45, 0x3a, 0x3f, 0x3d, 0x40, + 0x3c, 0x41, 0x30, 0x3c, 0x3f, 0x46, 0x37, 0x3c, 0x3a, 0x3c, 0x36, 0x3a, + 0x47, 0x3d, 0x31, 0x3f, 0x40, 0x3e, 0x36, 0x44, 0x41, 0x3d, 0x36, 0x3f, + 0x37, 0x3f, 0x34, 0x4b, 0x31, 0x47, 0x43, 0x3e, 0x3e, 0x3a, 0x3b, 0x4b, + 0x37, 0x32, 0x38, 0x3d, 0x37, 0x47, 0x46, 0x4d, 0x36, 0x3c, 0x3f, 0x3a, + 0x41, 0x31, 0x47, 0x43, 0x3d, 0x3d, 0x3e, 0x35, 0x3d, 0x46, 0x49, 0x2a, + 0x37, 0x3c, 0x39, 0x3d, 0x47, 0x3c, 0x34, 0x2c, 0x3e, 0x38, 0x47, 0x32, + 0x36, 0x36, 0x41, 0x38, 0x35, 0x44, 0x48, 0x3b, 0x39, 0x3e, 0x38, 0x3e, + 0x40, 0x36, 0x37, 0x46, 0x39, 0x3b, 0x34, 0x45, 0x40, 0x3b, 0x48, 0x36, + 0x34, 0x44, 0x37, 0x46, 0x3f, 0x42, 0x33, 0x36, 0x43, 0x3c, 0x41, 0x46, + 0x31, 0x42, 0x43, 0x44, 0x44, 0x3e, 0x42, 0x3b, 0x3b, 0x3a, 0x3c, 0x37, + 0x42, 0x41, 0x46, 0x38, 0x41, 0x3b, 0x40, 0x44, 0x37, 0x3c, 0x4c, 0x2e, + 0x3a, 0x3e, 0x3b, 0x36, 0x33, 0x27, 0x37, 0x5d, 0x27, 0x34, 0x32, 0x41, + 0x41, 0x3f, 0x40, 0x5d, 0x40, 0x3d, 0x48, 0x39, 0x2e, 0x30, 0x1f, 0x3f, + 0x38, 0x3f, 0x40, 0x33, 0x40, 0x38, 0x31, 0x3f, 0x42, 0x3e, 0x3b, 0x3a, + 0x42, 0x36, 0x3a, 0x42, 0x3c, 0x3b, 0x3d, 0x41, 0x3d, 0x40, 0x40, 0x3e, + 0x36, 0x41, 0x47, 0x3d, 0x33, 0x32, 0x33, 0x44, 0x3e, 0x3a, 0x3e, 0x3d, + 0x45, 0x3f, 0x38, 0x3f, 0x40, 0x3a, 0x3c, 0x46, 0x32, 0x42, 0x3c, 0x51, + 0x33, 0x38, 0x3a, 0x38, 0x41, 0x34, 0x45, 0x4e, 0x35, 0x3c, 0x42, 0x3e, + 0x3f, 0x45, 0x44, 0x4e, 0x39, 0x47, 0x3a, 0x33, 0x3e, 0x3b, 0x45, 0x42, + 0x37, 0x3a, 0x3e, 0x33, 0x41, 0x48, 0x32, 0x2a, 0x3b, 0x37, 0x3f, 0x3d, + 0x3a, 0x42, 0x41, 0x2f, 0x34, 0x3e, 0x49, 0x3b, 0x38, 0x3e, 0x3d, 0x3a, + 0x37, 0x3c, 0x44, 0x41, 0x39, 0x42, 0x3f, 0x39, 0x40, 0x35, 0x3d, 0x41, + 0x3b, 0x45, 0x44, 0x48, 0x3d, 0x42, 0x36, 0x33, 0x3e, 0x44, 0x3f, 0x41, + 0x42, 0x40, 0x49, 0x34, 0x48, 0x41, 0x3f, 0x40, 0x3c, 0x45, 0x47, 0x34, + 0x41, 0x37, 0x47, 0x3e, 0x41, 0x41, 0x39, 0x42, 0x3f, 0x3a, 0x46, 0x33, + 0x39, 0x41, 0x38, 0x38, 0x3e, 0x42, 0x41, 0x38, 0x35, 0x32, 0x33, 0x38, + 0x3a, 0x3f, 0x45, 0x66, 0x33, 0x47, 0x38, 0x3c, 0x41, 0x2f, 0x48, 0x55, + 0x33, 0x3e, 0x49, 0x3b, 0x3c, 0x30, 0x24, 0x45, 0x3c, 0x44, 0x43, 0x32, + 0x3d, 0x3f, 0x35, 0x3b, 0x3e, 0x36, 0x38, 0x3a, 0x36, 0x37, 0x3b, 0x41, + 0x38, 0x42, 0x3e, 0x43, 0x39, 0x3f, 0x3c, 0x40, 0x37, 0x43, 0x3e, 0x3b, + 0x3d, 0x35, 0x35, 0x3d, 0x43, 0x3f, 0x3a, 0x35, 0x37, 0x3c, 0x31, 0x47, + 0x44, 0x45, 0x40, 0x32, 0x44, 0x36, 0x38, 0x51, 0x3c, 0x41, 0x45, 0x37, + 0x39, 0x44, 0x3e, 0x4f, 0x3c, 0x3a, 0x38, 0x40, 0x3f, 0x34, 0x39, 0x4e, + 0x3d, 0x39, 0x45, 0x3f, 0x3e, 0x3c, 0x3b, 0x42, 0x3b, 0x3b, 0x34, 0x3d, + 0x41, 0x44, 0x39, 0x2e, 0x37, 0x44, 0x45, 0x37, 0x3d, 0x41, 0x3f, 0x33, + 0x3f, 0x3e, 0x3e, 0x40, 0x44, 0x3f, 0x37, 0x32, 0x35, 0x3e, 0x43, 0x41, + 0x39, 0x37, 0x35, 0x3f, 0x48, 0x3d, 0x43, 0x49, 0x38, 0x35, 0x3f, 0x48, + 0x3b, 0x3a, 0x34, 0x3f, 0x3c, 0x44, 0x3a, 0x40, 0x36, 0x35, 0x44, 0x36, + 0x44, 0x3b, 0x3d, 0x38, 0x3c, 0x44, 0x47, 0x3a, 0x3b, 0x45, 0x41, 0x3a, + 0x39, 0x35, 0x44, 0x3a, 0x49, 0x36, 0x48, 0x31, 0x42, 0x43, 0x42, 0x34, + 0x41, 0x40, 0x4d, 0x36, 0x3e, 0x35, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x39, + 0x3c, 0x44, 0x3f, 0x39, 0x3a, 0x36, 0x3d, 0x36, 0x3a, 0x3a, 0x34, 0x3b, + 0x38, 0x2f, 0x40, 0x34, 0x32, 0x4d, 0x43, 0x45, 0x4e, 0x3f, 0x48, 0x35, + 0x3b, 0x4d, 0x4f, 0x39, 0x42, 0x36, 0x46, 0x36, 0x4a, 0x3c, 0x37, 0x41, + 0x40, 0x43, 0x50, 0x36, 0x3e, 0x39, 0x44, 0x40, 0x36, 0x47, 0x3f, 0x36, + 0x45, 0x40, 0x45, 0x41, 0x3b, 0x37, 0x41, 0x39, 0x3b, 0x48, 0x37, 0x34, + 0x41, 0x45, 0x49, 0x3f, 0x39, 0x49, 0x3f, 0x3a, 0x42, 0x34, 0x38, 0x37, + 0x44, 0x34, 0x3c, 0x3d, 0x40, 0x47, 0x3a, 0x36, 0x3f, 0x3c, 0x41, 0x3e, + 0x47, 0x46, 0x46, 0x43, 0x3f, 0x38, 0x3b, 0x40, 0x3f, 0x48, 0x3b, 0x4c, + 0x3d, 0x4b, 0x34, 0x3b, 0x44, 0x43, 0x3c, 0x49, 0x38, 0x42, 0x41, 0x36, + 0x33, 0x36, 0x40, 0x46, 0x40, 0x3a, 0x42, 0x3c, 0x3d, 0x35, 0x3c, 0x52, + 0x3e, 0x40, 0x43, 0x43, 0x41, 0x3b, 0x3e, 0x44, 0x3f, 0x40, 0x40, 0x43, + 0x3d, 0x3f, 0x36, 0x42, 0x3f, 0x3c, 0x34, 0x3d, 0x33, 0x41, 0x3c, 0x39, + 0x34, 0x43, 0x3f, 0x34, 0x3c, 0x3a, 0x3a, 0x37, 0x42, 0x41, 0x40, 0x3e, + 0x3d, 0x3c, 0x41, 0x3c, 0x38, 0x33, 0x49, 0x46, 0x40, 0x40, 0x3a, 0x46, + 0x38, 0x3c, 0x37, 0x34, 0x3e, 0x3d, 0x32, 0x38, 0x3c, 0x4c, 0x3a, 0x34, + 0x35, 0x32, 0x39, 0x40, 0x3a, 0x58, 0x40, 0x46, 0x42, 0x33, 0x45, 0x39, + 0x34, 0x4f, 0x53, 0x45, 0x43, 0x3e, 0x41, 0x36, 0x3e, 0x3f, 0x40, 0x47, + 0x4e, 0x3d, 0x53, 0x2b, 0x41, 0x36, 0x3e, 0x38, 0x47, 0x41, 0x3f, 0x34, + 0x47, 0x40, 0x38, 0x39, 0x3d, 0x42, 0x3f, 0x3c, 0x48, 0x3a, 0x35, 0x3c, + 0x45, 0x49, 0x3c, 0x33, 0x33, 0x3f, 0x3c, 0x46, 0x43, 0x3f, 0x45, 0x31, + 0x35, 0x43, 0x46, 0x3a, 0x45, 0x3c, 0x37, 0x3a, 0x37, 0x36, 0x35, 0x3f, + 0x38, 0x49, 0x34, 0x3f, 0x3c, 0x42, 0x49, 0x3e, 0x3e, 0x3c, 0x39, 0x49, + 0x3e, 0x3c, 0x3b, 0x43, 0x44, 0x45, 0x39, 0x4b, 0x47, 0x47, 0x3e, 0x33, + 0x3c, 0x31, 0x34, 0x4f, 0x45, 0x43, 0x40, 0x3d, 0x42, 0x3b, 0x43, 0x50, + 0x3c, 0x3b, 0x37, 0x42, 0x47, 0x42, 0x3e, 0x4a, 0x3f, 0x3a, 0x48, 0x3d, + 0x48, 0x45, 0x3e, 0x40, 0x3a, 0x3c, 0x3d, 0x39, 0x41, 0x42, 0x3c, 0x42, + 0x43, 0x3c, 0x3b, 0x3d, 0x47, 0x49, 0x38, 0x3c, 0x46, 0x3a, 0x3c, 0x3f, + 0x3a, 0x46, 0x3a, 0x3b, 0x3d, 0x3a, 0x49, 0x46, 0x38, 0x40, 0x3e, 0x38, + 0x37, 0x32, 0x40, 0x3c, 0x42, 0x3d, 0x3b, 0x40, 0x3a, 0x38, 0x49, 0x33, + 0x40, 0x38, 0x2b, 0x3a, 0x3c, 0x4f, 0x4d, 0x3e, 0x35, 0x3d, 0x3b, 0x40, + 0x3a, 0x54, 0x3e, 0x3e, 0x43, 0x30, 0x47, 0x3d, 0x3b, 0x53, 0x52, 0x4a, + 0x43, 0x41, 0x49, 0x37, 0x3b, 0x35, 0x44, 0x3c, 0x45, 0x40, 0x4f, 0x36, + 0x4b, 0x42, 0x41, 0x3a, 0x41, 0x44, 0x47, 0x32, 0x43, 0x35, 0x3f, 0x37, + 0x43, 0x41, 0x43, 0x36, 0x3f, 0x3b, 0x3d, 0x38, 0x3d, 0x40, 0x42, 0x36, + 0x44, 0x3a, 0x39, 0x47, 0x37, 0x34, 0x42, 0x3a, 0x37, 0x38, 0x37, 0x3f, + 0x36, 0x3b, 0x45, 0x3f, 0x3f, 0x3d, 0x39, 0x3d, 0x39, 0x41, 0x37, 0x3f, + 0x3f, 0x3d, 0x3f, 0x41, 0x43, 0x41, 0x45, 0x43, 0x41, 0x3c, 0x3e, 0x40, + 0x40, 0x39, 0x41, 0x4f, 0x47, 0x42, 0x46, 0x48, 0x3b, 0x3b, 0x3c, 0x46, + 0x47, 0x3e, 0x46, 0x37, 0x38, 0x3d, 0x38, 0x52, 0x36, 0x46, 0x3c, 0x3a, + 0x3b, 0x37, 0x48, 0x4b, 0x3f, 0x42, 0x3c, 0x36, 0x40, 0x37, 0x33, 0x4c, + 0x39, 0x34, 0x41, 0x34, 0x3f, 0x3b, 0x35, 0x4b, 0x3b, 0x45, 0x43, 0x31, + 0x3e, 0x39, 0x30, 0x3d, 0x32, 0x43, 0x44, 0x3c, 0x3e, 0x38, 0x43, 0x41, + 0x3e, 0x37, 0x41, 0x39, 0x39, 0x44, 0x43, 0x38, 0x3f, 0x37, 0x48, 0x3f, + 0x3b, 0x44, 0x37, 0x3f, 0x3a, 0x3f, 0x3b, 0x33, 0x42, 0x3e, 0x2f, 0x42, + 0x44, 0x4f, 0x52, 0x3c, 0x34, 0x33, 0x39, 0x46, 0x31, 0x55, 0x43, 0x4e, + 0x49, 0x38, 0x4d, 0x48, 0x34, 0x4d, 0x5c, 0x4d, 0x49, 0x37, 0x4f, 0x40, + 0x3c, 0x3d, 0x41, 0x42, 0x3f, 0x51, 0x4b, 0x2f, 0x46, 0x35, 0x39, 0x3c, + 0x49, 0x3d, 0x4e, 0x32, 0x43, 0x47, 0x31, 0x3e, 0x42, 0x4a, 0x4c, 0x39, + 0x43, 0x46, 0x3e, 0x3f, 0x44, 0x3c, 0x42, 0x30, 0x3e, 0x34, 0x3b, 0x3b, + 0x3a, 0x3c, 0x42, 0x3d, 0x3d, 0x48, 0x48, 0x36, 0x3a, 0x45, 0x38, 0x40, + 0x3c, 0x41, 0x3f, 0x49, 0x42, 0x41, 0x38, 0x3d, 0x3d, 0x44, 0x3b, 0x3d, + 0x35, 0x48, 0x43, 0x3b, 0x32, 0x41, 0x3e, 0x3a, 0x46, 0x41, 0x40, 0x54, + 0x38, 0x3f, 0x3c, 0x36, 0x3b, 0x36, 0x43, 0x50, 0x38, 0x3c, 0x44, 0x3b, + 0x43, 0x47, 0x32, 0x50, 0x3d, 0x46, 0x3d, 0x3b, 0x39, 0x37, 0x3b, 0x4a, + 0x47, 0x43, 0x46, 0x3d, 0x3d, 0x41, 0x43, 0x45, 0x3b, 0x3c, 0x39, 0x47, + 0x43, 0x42, 0x39, 0x4c, 0x34, 0x41, 0x45, 0x3b, 0x38, 0x3e, 0x37, 0x3f, + 0x45, 0x43, 0x39, 0x42, 0x3c, 0x3d, 0x3d, 0x3c, 0x48, 0x39, 0x3b, 0x3a, + 0x46, 0x45, 0x3d, 0x3a, 0x3f, 0x3a, 0x45, 0x36, 0x3d, 0x43, 0x36, 0x43, + 0x42, 0x3d, 0x41, 0x3f, 0x3a, 0x3f, 0x31, 0x37, 0x48, 0x4f, 0x4e, 0x36, + 0x30, 0x3a, 0x3e, 0x3e, 0x38, 0x57, 0x40, 0x47, 0x47, 0x38, 0x4f, 0x46, + 0x3d, 0x4a, 0x50, 0x4c, 0x42, 0x3b, 0x4d, 0x3d, 0x3d, 0x33, 0x40, 0x41, + 0x48, 0x4b, 0x46, 0x39, 0x4d, 0x30, 0x45, 0x38, 0x48, 0x3c, 0x48, 0x3b, + 0x4d, 0x40, 0x3b, 0x40, 0x46, 0x41, 0x51, 0x34, 0x40, 0x43, 0x3f, 0x42, + 0x45, 0x42, 0x3e, 0x35, 0x3d, 0x38, 0x37, 0x3a, 0x42, 0x40, 0x43, 0x3c, + 0x3c, 0x3d, 0x43, 0x40, 0x45, 0x3a, 0x3e, 0x3a, 0x3e, 0x40, 0x43, 0x35, + 0x37, 0x3f, 0x3f, 0x3e, 0x39, 0x3f, 0x47, 0x38, 0x3e, 0x44, 0x3b, 0x3c, + 0x3b, 0x32, 0x40, 0x3e, 0x42, 0x45, 0x3a, 0x52, 0x3a, 0x3e, 0x45, 0x40, + 0x41, 0x48, 0x3f, 0x4e, 0x3e, 0x42, 0x3d, 0x39, 0x3a, 0x33, 0x3f, 0x4b, + 0x3e, 0x38, 0x36, 0x3e, 0x31, 0x41, 0x3a, 0x40, 0x3b, 0x37, 0x3f, 0x3e, + 0x3e, 0x3f, 0x35, 0x44, 0x3d, 0x42, 0x3d, 0x44, 0x42, 0x3f, 0x3e, 0x44, + 0x3e, 0x45, 0x37, 0x3a, 0x3b, 0x42, 0x3f, 0x41, 0x3b, 0x3f, 0x41, 0x41, + 0x3e, 0x34, 0x47, 0x39, 0x46, 0x46, 0x37, 0x39, 0x3f, 0x45, 0x39, 0x39, + 0x3a, 0x40, 0x38, 0x3a, 0x31, 0x34, 0x3a, 0x41, 0x38, 0x41, 0x3a, 0x41, + 0x44, 0x37, 0x2d, 0x41, 0x43, 0x4d, 0x4b, 0x3b, 0x2c, 0x30, 0x42, 0x3b, + 0x31, 0x56, 0x43, 0x47, 0x47, 0x38, 0x50, 0x44, 0x40, 0x52, 0x5a, 0x50, + 0x44, 0x3f, 0x4b, 0x35, 0x3a, 0x36, 0x41, 0x44, 0x47, 0x4e, 0x52, 0x36, + 0x45, 0x39, 0x38, 0x3c, 0x42, 0x44, 0x40, 0x3b, 0x4b, 0x38, 0x35, 0x35, + 0x3f, 0x40, 0x4f, 0x39, 0x3d, 0x37, 0x34, 0x3e, 0x41, 0x4c, 0x40, 0x37, + 0x3d, 0x3b, 0x37, 0x37, 0x40, 0x42, 0x35, 0x39, 0x41, 0x42, 0x3d, 0x34, + 0x3c, 0x37, 0x3a, 0x3d, 0x46, 0x46, 0x46, 0x3f, 0x44, 0x3d, 0x3c, 0x40, + 0x3c, 0x3a, 0x3d, 0x3b, 0x3b, 0x41, 0x47, 0x3a, 0x43, 0x43, 0x43, 0x3b, + 0x3e, 0x3e, 0x42, 0x46, 0x36, 0x37, 0x45, 0x35, 0x3c, 0x3b, 0x31, 0x4b, + 0x3c, 0x3e, 0x3a, 0x3a, 0x42, 0x42, 0x34, 0x47, 0x37, 0x34, 0x41, 0x3d, + 0x3e, 0x39, 0x43, 0x47, 0x31, 0x3b, 0x40, 0x3b, 0x42, 0x3d, 0x44, 0x44, + 0x37, 0x39, 0x44, 0x3b, 0x40, 0x3a, 0x3d, 0x44, 0x3c, 0x40, 0x42, 0x3b, + 0x40, 0x3e, 0x32, 0x3d, 0x3c, 0x3e, 0x44, 0x3e, 0x47, 0x3d, 0x3f, 0x2e, + 0x3e, 0x3d, 0x3f, 0x3b, 0x3b, 0x43, 0x43, 0x3c, 0x3a, 0x3c, 0x3a, 0x36, + 0x38, 0x46, 0x30, 0x3e, 0x3f, 0x35, 0x3e, 0x34, 0x3c, 0x34, 0x32, 0x4a, + 0x41, 0x48, 0x48, 0x3f, 0x34, 0x37, 0x42, 0x43, 0x36, 0x59, 0x42, 0x3f, + 0x4b, 0x3d, 0x5d, 0x45, 0x3b, 0x51, 0x51, 0x4c, 0x41, 0x40, 0x4d, 0x36, + 0x3f, 0x34, 0x39, 0x3d, 0x4a, 0x4b, 0x4f, 0x33, 0x48, 0x32, 0x3c, 0x32, + 0x48, 0x4c, 0x4d, 0x3a, 0x49, 0x3a, 0x3a, 0x2e, 0x4b, 0x44, 0x4f, 0x33, + 0x3a, 0x48, 0x34, 0x43, 0x38, 0x45, 0x44, 0x35, 0x3b, 0x3f, 0x40, 0x37, + 0x35, 0x34, 0x38, 0x3e, 0x41, 0x3e, 0x3b, 0x47, 0x41, 0x47, 0x3c, 0x3c, + 0x39, 0x40, 0x3e, 0x45, 0x36, 0x41, 0x3f, 0x3f, 0x3c, 0x44, 0x3f, 0x43, + 0x3d, 0x3c, 0x49, 0x42, 0x3e, 0x3f, 0x48, 0x37, 0x43, 0x37, 0x43, 0x3d, + 0x32, 0x42, 0x44, 0x39, 0x36, 0x37, 0x40, 0x46, 0x47, 0x3d, 0x3a, 0x42, + 0x3f, 0x38, 0x37, 0x48, 0x39, 0x40, 0x3c, 0x37, 0x33, 0x38, 0x38, 0x40, + 0x41, 0x3c, 0x3f, 0x3b, 0x40, 0x3a, 0x47, 0x46, 0x3a, 0x37, 0x42, 0x47, + 0x3b, 0x3f, 0x3b, 0x40, 0x33, 0x3f, 0x3a, 0x3c, 0x38, 0x3a, 0x36, 0x38, + 0x36, 0x40, 0x48, 0x42, 0x48, 0x3c, 0x43, 0x36, 0x32, 0x3b, 0x34, 0x39, + 0x38, 0x46, 0x37, 0x3b, 0x44, 0x34, 0x36, 0x38, 0x3c, 0x43, 0x33, 0x3c, + 0x3b, 0x45, 0x38, 0x38, 0x44, 0x33, 0x36, 0x4a, 0x46, 0x4c, 0x4a, 0x34, + 0x36, 0x37, 0x43, 0x42, 0x33, 0x58, 0x43, 0x48, 0x44, 0x38, 0x5f, 0x3f, + 0x3c, 0x4d, 0x53, 0x52, 0x43, 0x47, 0x52, 0x3e, 0x3b, 0x2d, 0x3b, 0x3a, + 0x4b, 0x49, 0x53, 0x38, 0x4c, 0x2f, 0x38, 0x31, 0x42, 0x40, 0x48, 0x3f, + 0x44, 0x3c, 0x3c, 0x34, 0x46, 0x3f, 0x49, 0x3a, 0x43, 0x3d, 0x34, 0x42, + 0x36, 0x47, 0x51, 0x3c, 0x3d, 0x39, 0x39, 0x3a, 0x3b, 0x35, 0x35, 0x41, + 0x47, 0x3c, 0x3b, 0x43, 0x3f, 0x45, 0x3e, 0x40, 0x3c, 0x3f, 0x3c, 0x42, + 0x3b, 0x3e, 0x38, 0x3f, 0x3f, 0x41, 0x39, 0x39, 0x3d, 0x43, 0x4f, 0x3d, + 0x48, 0x3b, 0x44, 0x45, 0x3d, 0x3b, 0x49, 0x43, 0x44, 0x3d, 0x37, 0x3b, + 0x3c, 0x45, 0x46, 0x44, 0x35, 0x3e, 0x32, 0x35, 0x34, 0x3b, 0x40, 0x43, + 0x3e, 0x45, 0x37, 0x3d, 0x3f, 0x43, 0x36, 0x3f, 0x3f, 0x43, 0x39, 0x44, + 0x3e, 0x3e, 0x45, 0x40, 0x3e, 0x44, 0x3b, 0x3e, 0x42, 0x42, 0x3b, 0x3d, + 0x3a, 0x40, 0x39, 0x3a, 0x32, 0x36, 0x41, 0x30, 0x39, 0x46, 0x33, 0x3f, + 0x46, 0x40, 0x3c, 0x31, 0x41, 0x3a, 0x3f, 0x3f, 0x3b, 0x36, 0x3f, 0x38, + 0x36, 0x3e, 0x35, 0x35, 0x3b, 0x3d, 0x3f, 0x39, 0x46, 0x37, 0x3a, 0x47, + 0x37, 0x39, 0x2c, 0x55, 0x40, 0x4b, 0x4a, 0x39, 0x35, 0x42, 0x3d, 0x40, + 0x3a, 0x54, 0x41, 0x48, 0x51, 0x3b, 0x61, 0x3e, 0x3e, 0x4d, 0x51, 0x52, + 0x3e, 0x43, 0x52, 0x41, 0x48, 0x2d, 0x35, 0x35, 0x4b, 0x44, 0x4d, 0x3c, + 0x54, 0x33, 0x39, 0x27, 0x4a, 0x44, 0x4a, 0x41, 0x3c, 0x3a, 0x31, 0x2f, + 0x3d, 0x42, 0x48, 0x3f, 0x42, 0x40, 0x44, 0x3b, 0x40, 0x3e, 0x49, 0x3a, + 0x3c, 0x35, 0x30, 0x3e, 0x3e, 0x3d, 0x36, 0x3a, 0x3e, 0x3a, 0x4a, 0x3e, + 0x3d, 0x49, 0x40, 0x43, 0x3e, 0x45, 0x3f, 0x3c, 0x3b, 0x42, 0x3a, 0x39, + 0x3b, 0x47, 0x3f, 0x39, 0x49, 0x46, 0x3d, 0x34, 0x32, 0x44, 0x46, 0x42, + 0x47, 0x39, 0x49, 0x48, 0x3b, 0x38, 0x45, 0x45, 0x37, 0x38, 0x46, 0x46, + 0x37, 0x42, 0x35, 0x34, 0x45, 0x42, 0x35, 0x43, 0x3b, 0x3a, 0x43, 0x43, + 0x40, 0x42, 0x35, 0x3f, 0x38, 0x3f, 0x3a, 0x3a, 0x3b, 0x3f, 0x3e, 0x36, + 0x3f, 0x3c, 0x48, 0x3b, 0x3a, 0x41, 0x41, 0x35, 0x33, 0x3f, 0x3b, 0x45, + 0x48, 0x36, 0x40, 0x38, 0x47, 0x3d, 0x35, 0x40, 0x41, 0x42, 0x41, 0x37, + 0x41, 0x3e, 0x36, 0x48, 0x3e, 0x3c, 0x32, 0x39, 0x41, 0x40, 0x38, 0x3f, + 0x46, 0x43, 0x33, 0x40, 0x43, 0x43, 0x3a, 0x49, 0x3f, 0x35, 0x2c, 0x5d, + 0x43, 0x49, 0x52, 0x3b, 0x3c, 0x41, 0x40, 0x4a, 0x33, 0x50, 0x41, 0x46, + 0x52, 0x41, 0x68, 0x48, 0x44, 0x53, 0x54, 0x55, 0x42, 0x42, 0x57, 0x44, + 0x47, 0x35, 0x35, 0x3e, 0x4b, 0x44, 0x4e, 0x38, 0x55, 0x2f, 0x36, 0x2d, + 0x40, 0x48, 0x4b, 0x41, 0x48, 0x36, 0x32, 0x32, 0x44, 0x42, 0x47, 0x42, + 0x48, 0x3d, 0x3d, 0x39, 0x3e, 0x35, 0x4b, 0x39, 0x38, 0x3a, 0x39, 0x46, + 0x38, 0x3f, 0x3a, 0x42, 0x4b, 0x45, 0x3e, 0x32, 0x46, 0x43, 0x3b, 0x40, + 0x45, 0x41, 0x3e, 0x43, 0x37, 0x3d, 0x43, 0x3b, 0x46, 0x48, 0x42, 0x3b, + 0x3d, 0x48, 0x4a, 0x3c, 0x3b, 0x42, 0x40, 0x3c, 0x3a, 0x42, 0x38, 0x47, + 0x3b, 0x3b, 0x3d, 0x41, 0x3f, 0x38, 0x3f, 0x4a, 0x44, 0x3f, 0x47, 0x3a, + 0x47, 0x44, 0x43, 0x43, 0x34, 0x3d, 0x3a, 0x3c, 0x47, 0x3f, 0x3e, 0x39, + 0x42, 0x4a, 0x40, 0x36, 0x40, 0x41, 0x42, 0x3f, 0x3f, 0x43, 0x39, 0x38, + 0x3c, 0x3b, 0x4c, 0x2f, 0x41, 0x39, 0x40, 0x42, 0x3f, 0x42, 0x40, 0x36, + 0x3b, 0x45, 0x41, 0x41, 0x44, 0x45, 0x42, 0x37, 0x3d, 0x3a, 0x33, 0x3e, + 0x3b, 0x3b, 0x3c, 0x3d, 0x38, 0x49, 0x44, 0x39, 0x3f, 0x48, 0x3d, 0x41, + 0x42, 0x43, 0x44, 0x3e, 0x41, 0x3d, 0x32, 0x59, 0x45, 0x4b, 0x4b, 0x38, + 0x37, 0x3d, 0x48, 0x42, 0x3d, 0x52, 0x43, 0x46, 0x54, 0x48, 0x67, 0x4d, + 0x45, 0x4e, 0x49, 0x52, 0x45, 0x45, 0x58, 0x3b, 0x41, 0x38, 0x3f, 0x3f, + 0x49, 0x44, 0x4f, 0x48, 0x57, 0x31, 0x3c, 0x2a, 0x3e, 0x4c, 0x41, 0x40, + 0x47, 0x3f, 0x33, 0x34, 0x3f, 0x42, 0x48, 0x43, 0x4b, 0x38, 0x39, 0x3d, + 0x3f, 0x3e, 0x4b, 0x3f, 0x35, 0x36, 0x3c, 0x46, 0x3c, 0x45, 0x37, 0x3b, + 0x3c, 0x39, 0x41, 0x40, 0x41, 0x43, 0x44, 0x41, 0x45, 0x4f, 0x44, 0x43, + 0x44, 0x3c, 0x45, 0x34, 0x42, 0x45, 0x3f, 0x46, 0x3f, 0x43, 0x3d, 0x3a, + 0x39, 0x47, 0x45, 0x3d, 0x3f, 0x3b, 0x3d, 0x42, 0x38, 0x48, 0x48, 0x3b, + 0x3c, 0x3a, 0x3f, 0x41, 0x44, 0x4b, 0x44, 0x48, 0x41, 0x3c, 0x3d, 0x3c, + 0x3e, 0x3a, 0x4a, 0x3b, 0x49, 0x35, 0x3a, 0x3d, 0x41, 0x3f, 0x49, 0x39, + 0x44, 0x37, 0x3f, 0x3c, 0x42, 0x40, 0x4a, 0x46, 0x39, 0x38, 0x46, 0x37, + 0x41, 0x46, 0x41, 0x45, 0x40, 0x3b, 0x3b, 0x33, 0x3b, 0x39, 0x3c, 0x43, + 0x37, 0x3c, 0x44, 0x3d, 0x46, 0x39, 0x3c, 0x3c, 0x44, 0x48, 0x41, 0x44, + 0x41, 0x43, 0x46, 0x3b, 0x47, 0x41, 0x31, 0x41, 0x44, 0x40, 0x43, 0x42, + 0x3e, 0x43, 0x34, 0x65, 0x4f, 0x50, 0x4d, 0x3a, 0x37, 0x43, 0x4d, 0x4a, + 0x3d, 0x54, 0x40, 0x42, 0x5b, 0x3b, 0x71, 0x49, 0x44, 0x4f, 0x54, 0x56, + 0x48, 0x40, 0x52, 0x41, 0x42, 0x38, 0x3c, 0x49, 0x4a, 0x45, 0x51, 0x35, + 0x54, 0x2f, 0x35, 0x25, 0x4d, 0x3f, 0x4d, 0x43, 0x49, 0x33, 0x32, 0x3a, + 0x46, 0x48, 0x48, 0x3d, 0x43, 0x3a, 0x3c, 0x3a, 0x48, 0x40, 0x4b, 0x3b, + 0x45, 0x3b, 0x3f, 0x38, 0x37, 0x41, 0x31, 0x3b, 0x41, 0x43, 0x43, 0x37, + 0x48, 0x3f, 0x48, 0x37, 0x40, 0x4a, 0x43, 0x45, 0x3d, 0x39, 0x37, 0x37, + 0x3c, 0x3f, 0x47, 0x48, 0x43, 0x3e, 0x41, 0x3f, 0x3e, 0x38, 0x3e, 0x37, + 0x45, 0x45, 0x35, 0x44, 0x38, 0x3a, 0x49, 0x43, 0x40, 0x41, 0x40, 0x44, + 0x3c, 0x3e, 0x40, 0x38, 0x42, 0x41, 0x3c, 0x41, 0x3a, 0x3b, 0x3c, 0x3a, + 0x49, 0x3c, 0x42, 0x44, 0x3f, 0x39, 0x45, 0x32, 0x45, 0x43, 0x45, 0x39, + 0x43, 0x41, 0x4b, 0x39, 0x32, 0x3c, 0x3c, 0x36, 0x39, 0x3f, 0x46, 0x32, + 0x39, 0x35, 0x4f, 0x32, 0x3e, 0x40, 0x3d, 0x3e, 0x3a, 0x39, 0x4c, 0x38, + 0x43, 0x38, 0x49, 0x3b, 0x33, 0x39, 0x3b, 0x36, 0x36, 0x43, 0x3b, 0x3c, + 0x32, 0x3c, 0x3a, 0x45, 0x31, 0x3d, 0x37, 0x40, 0x3f, 0x3f, 0x35, 0xff, + 0x49, 0x4e, 0x4c, 0x3c, 0x36, 0x43, 0x46, 0x45, 0x41, 0x59, 0x44, 0x4a, + 0x53, 0x44, 0x71, 0x4a, 0x39, 0x4f, 0x50, 0x4b, 0x47, 0x42, 0x5a, 0x3c, + 0x45, 0x38, 0x3e, 0x42, 0x53, 0x43, 0x52, 0x3a, 0x52, 0x34, 0x31, 0x20, + 0x49, 0x4e, 0x46, 0x43, 0x4b, 0x3d, 0x2b, 0x27, 0x46, 0x46, 0x47, 0x41, + 0x42, 0x37, 0x39, 0x38, 0x45, 0x3f, 0x51, 0x3d, 0x48, 0x3f, 0x33, 0x3f, + 0x38, 0x45, 0x31, 0x38, 0x41, 0x3d, 0x47, 0x39, 0x42, 0x40, 0x4c, 0x3f, + 0x40, 0x42, 0x41, 0x41, 0x41, 0x42, 0x39, 0x35, 0x3f, 0x46, 0x45, 0x36, + 0x3f, 0x43, 0x3b, 0x39, 0x41, 0x38, 0x43, 0x37, 0x3d, 0x44, 0x3b, 0x40, + 0x36, 0x3d, 0x42, 0x41, 0x41, 0x3d, 0x38, 0x4a, 0x40, 0x4a, 0x4c, 0x38, + 0x3f, 0x40, 0x45, 0x3c, 0x3f, 0x4b, 0x43, 0x41, 0x43, 0x3e, 0x43, 0x3f, + 0x36, 0x40, 0x40, 0x39, 0x3f, 0x3a, 0x3a, 0x30, 0x41, 0x3c, 0x3c, 0x34, + 0x46, 0x38, 0x43, 0x34, 0x3a, 0x42, 0x43, 0x42, 0x40, 0x41, 0x49, 0x34, + 0x35, 0x40, 0x47, 0x3d, 0x3d, 0x3e, 0x4c, 0x33, 0x3c, 0x3b, 0x39, 0x43, + 0x3a, 0x3e, 0x3b, 0x37, 0x3f, 0x42, 0x31, 0x3d, 0x41, 0x3e, 0x32, 0x47, + 0x34, 0x41, 0x3d, 0x35, 0x39, 0x40, 0x38, 0x69, 0x4f, 0x4a, 0x49, 0x37, + 0x37, 0x44, 0x43, 0x46, 0x40, 0x58, 0x43, 0x48, 0x54, 0x46, 0x6c, 0x50, + 0x3a, 0x50, 0x50, 0x57, 0x47, 0x46, 0x5c, 0x40, 0x40, 0x39, 0x3e, 0x46, + 0x53, 0x46, 0x5c, 0x36, 0x4f, 0x32, 0x30, 0x2d, 0x4a, 0x48, 0x41, 0x45, + 0x47, 0x2f, 0x32, 0x2b, 0x43, 0x40, 0x43, 0x3c, 0x40, 0x44, 0x3e, 0x37, + 0x39, 0x3e, 0x48, 0x42, 0x45, 0x36, 0x47, 0x3f, 0x3b, 0x41, 0x35, 0x35, + 0x3b, 0x3e, 0x35, 0x43, 0x3e, 0x41, 0x3d, 0x36, 0x41, 0x3c, 0x40, 0x44, + 0x3d, 0x40, 0x35, 0x32, 0x48, 0x3e, 0x39, 0x42, 0x44, 0x3d, 0x39, 0x3b, + 0x3b, 0x45, 0x40, 0x4a, 0x3f, 0x41, 0x43, 0x39, 0x42, 0x44, 0x4c, 0x3c, + 0x3f, 0x3e, 0x3f, 0x43, 0x40, 0x42, 0x4c, 0x3b, 0x3e, 0x3d, 0x49, 0x42, + 0x40, 0x44, 0x40, 0x34, 0x36, 0x40, 0x45, 0x39, 0x42, 0x40, 0x3e, 0x44, + 0x45, 0x37, 0x3c, 0x38, 0x3e, 0x49, 0x3e, 0x3c, 0x41, 0x3d, 0x42, 0x32, + 0x40, 0x45, 0x3e, 0x36, 0x44, 0x3a, 0x4e, 0x38, 0x43, 0x38, 0x40, 0x38, + 0x49, 0x42, 0x40, 0x3d, 0x42, 0x48, 0x48, 0x3d, 0x41, 0x3a, 0x3f, 0x41, + 0x38, 0x3c, 0x44, 0x39, 0x3a, 0x32, 0x3a, 0x3e, 0x3d, 0x3b, 0x39, 0x38, + 0x3a, 0x43, 0x3a, 0x6b, 0x45, 0x50, 0x47, 0x33, 0x38, 0x48, 0x4d, 0x4f, + 0x39, 0x4b, 0x46, 0x4a, 0x4f, 0x42, 0x6f, 0x4b, 0x40, 0x55, 0x54, 0x50, + 0x42, 0x47, 0x5e, 0x46, 0x40, 0x34, 0x40, 0x47, 0x52, 0x46, 0x55, 0x3b, + 0x4f, 0x2b, 0x35, 0x33, 0x4c, 0x44, 0x44, 0x48, 0x47, 0x37, 0x35, 0x27, + 0x4a, 0x3b, 0x41, 0x40, 0x40, 0x3e, 0x36, 0x39, 0x3e, 0x3c, 0x45, 0x3f, + 0x4d, 0x41, 0x3d, 0x48, 0x47, 0x46, 0x33, 0x3d, 0x3d, 0x3e, 0x34, 0x3f, + 0x3e, 0x3a, 0x41, 0x35, 0x3b, 0x3e, 0x42, 0x3c, 0x42, 0x42, 0x40, 0x31, + 0x37, 0x40, 0x36, 0x42, 0x48, 0x39, 0x3d, 0x3c, 0x3a, 0x43, 0x39, 0x3d, + 0x47, 0x49, 0x43, 0x3d, 0x45, 0x39, 0x44, 0x37, 0x3e, 0x4d, 0x3d, 0x40, + 0x3d, 0x4c, 0x4d, 0x44, 0x3c, 0x3d, 0x46, 0x41, 0x41, 0x42, 0x40, 0x40, + 0x41, 0x3a, 0x3c, 0x3b, 0x3c, 0x44, 0x40, 0x34, 0x44, 0x38, 0x3b, 0x33, + 0x45, 0x45, 0x44, 0x3f, 0x3e, 0x3a, 0x3b, 0x3b, 0x43, 0x39, 0x3a, 0x45, + 0x3b, 0x3a, 0x4b, 0x39, 0x3d, 0x38, 0x41, 0x39, 0x42, 0x45, 0x43, 0x40, + 0x3e, 0x35, 0x44, 0x3f, 0x45, 0x41, 0x40, 0x3e, 0x43, 0x42, 0x37, 0x3a, + 0x38, 0x35, 0x3a, 0x48, 0x3e, 0x3b, 0x40, 0x38, 0x3c, 0x3c, 0x3b, 0x6a, + 0x48, 0x4d, 0x4d, 0x34, 0x38, 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x41, 0x4b, + 0x58, 0x46, 0x71, 0x49, 0x3d, 0x53, 0x44, 0x52, 0x42, 0x3e, 0x57, 0x4c, + 0x4c, 0x38, 0x40, 0x3b, 0x5c, 0x4c, 0x52, 0x3e, 0x4c, 0x2d, 0x32, 0x37, + 0x49, 0x3f, 0x41, 0x47, 0x4a, 0x3b, 0x2f, 0x26, 0x45, 0x40, 0x47, 0x42, + 0x3d, 0x39, 0x2d, 0x2c, 0x3f, 0x45, 0x46, 0x44, 0x48, 0x43, 0x42, 0x48, + 0x40, 0x41, 0x3b, 0x3b, 0x41, 0x3b, 0x39, 0x40, 0x3b, 0x47, 0x3f, 0x38, + 0x3f, 0x49, 0x3b, 0x35, 0x40, 0x45, 0x38, 0x35, 0x36, 0x34, 0x3e, 0x3d, + 0x46, 0x3e, 0x33, 0x38, 0x43, 0x48, 0x3f, 0x45, 0x31, 0x44, 0x38, 0x35, + 0x3c, 0x41, 0x4b, 0x44, 0x3d, 0x43, 0x38, 0x48, 0x3c, 0x39, 0x4a, 0x42, + 0x3d, 0x43, 0x3f, 0x49, 0x3e, 0x47, 0x49, 0x41, 0x3b, 0x3c, 0x47, 0x3a, + 0x3d, 0x40, 0x4a, 0x38, 0x3d, 0x3b, 0x47, 0x3a, 0x36, 0x47, 0x42, 0x46, + 0x3c, 0x3d, 0x45, 0x3b, 0x48, 0x3f, 0x38, 0x36, 0x39, 0x46, 0x43, 0x3a, + 0x41, 0x3d, 0x39, 0x39, 0x46, 0x37, 0x3f, 0x3f, 0x3a, 0x46, 0x3f, 0x39, + 0x49, 0x44, 0x42, 0x3a, 0x3a, 0x43, 0x3e, 0x42, 0x3d, 0x3d, 0x43, 0x40, + 0x43, 0x3c, 0x3f, 0x43, 0x40, 0x42, 0x3b, 0x57, 0x4a, 0x4f, 0x4a, 0x2d, + 0x3b, 0x48, 0x45, 0x42, 0x34, 0x4c, 0x3e, 0x4f, 0x4d, 0x40, 0x6c, 0x4b, + 0x3b, 0x4d, 0x4c, 0x57, 0x49, 0x3d, 0x5d, 0x44, 0x43, 0x29, 0x42, 0x3f, + 0x5b, 0x47, 0x4f, 0x3e, 0x54, 0x2e, 0x34, 0x34, 0x4b, 0x47, 0x46, 0x46, + 0x4b, 0x34, 0x36, 0x28, 0x3e, 0x3f, 0x42, 0x40, 0x3b, 0x38, 0x39, 0x42, + 0x49, 0x3d, 0x49, 0x47, 0x47, 0x3b, 0x43, 0x34, 0x39, 0x36, 0x42, 0x3d, + 0x37, 0x40, 0x37, 0x38, 0x46, 0x42, 0x49, 0x37, 0x44, 0x3f, 0x38, 0x3e, + 0x36, 0x32, 0x33, 0x38, 0x40, 0x46, 0x42, 0x34, 0x41, 0x42, 0x3e, 0x38, + 0x44, 0x3e, 0x3f, 0x43, 0x3f, 0x43, 0x35, 0x3f, 0x4d, 0x3b, 0x43, 0x39, + 0x40, 0x47, 0x3f, 0x4a, 0x3a, 0x3f, 0x45, 0x45, 0x48, 0x42, 0x3b, 0x47, + 0x42, 0x4b, 0x47, 0x3e, 0x3c, 0x42, 0x46, 0x39, 0x41, 0x3f, 0x48, 0x33, + 0x45, 0x34, 0x3d, 0x30, 0x40, 0x4c, 0x40, 0x40, 0x39, 0x37, 0x40, 0x33, + 0x49, 0x42, 0x45, 0x38, 0x3c, 0x43, 0x45, 0x35, 0x37, 0x33, 0x34, 0x3b, + 0x3b, 0x38, 0x39, 0x41, 0x42, 0x40, 0x3e, 0x3e, 0x41, 0x33, 0x3a, 0x36, + 0x40, 0x3a, 0x3c, 0x45, 0x43, 0x3c, 0x40, 0x41, 0x49, 0x47, 0x35, 0x34, + 0x3a, 0x3d, 0x3a, 0x68, 0x4f, 0x48, 0x43, 0x36, 0x37, 0x3e, 0x45, 0x49, + 0x3a, 0x4d, 0x41, 0x3d, 0x46, 0x45, 0x65, 0x46, 0x38, 0x4d, 0x4a, 0x53, + 0x43, 0x41, 0x5d, 0x47, 0x41, 0x34, 0x39, 0x43, 0x4e, 0x48, 0x50, 0x38, + 0x53, 0x32, 0x30, 0x2e, 0x49, 0x4c, 0x4d, 0x3f, 0x46, 0x38, 0x34, 0x2b, + 0x44, 0x44, 0x41, 0x41, 0x36, 0x40, 0x3f, 0x32, 0x46, 0x38, 0x50, 0x45, + 0x3f, 0x3d, 0x3b, 0x36, 0x3b, 0x43, 0x3a, 0x34, 0x36, 0x3f, 0x39, 0x35, + 0x3c, 0x40, 0x40, 0x37, 0x3c, 0x39, 0x3d, 0x36, 0x48, 0x3d, 0x43, 0x34, + 0x3b, 0x46, 0x43, 0x41, 0x33, 0x3e, 0x44, 0x3d, 0x44, 0x44, 0x4c, 0x3c, + 0x37, 0x49, 0x42, 0x35, 0x45, 0x3a, 0x3c, 0x41, 0x3a, 0x45, 0x46, 0x41, + 0x3c, 0x48, 0x46, 0x36, 0x36, 0x42, 0x3b, 0x46, 0x42, 0x45, 0x44, 0x47, + 0x3f, 0x44, 0x3a, 0x35, 0x37, 0x46, 0x40, 0x38, 0x40, 0x3d, 0x36, 0x2c, + 0x34, 0x47, 0x40, 0x38, 0x3f, 0x3f, 0x44, 0x2d, 0x3b, 0x3d, 0x3e, 0x44, + 0x3c, 0x40, 0x3e, 0x33, 0x3c, 0x3a, 0x49, 0x40, 0x42, 0x42, 0x3a, 0x3b, + 0x33, 0x3d, 0x3c, 0x43, 0x3e, 0x3d, 0x3a, 0x3a, 0x48, 0x3e, 0x3c, 0x39, + 0x3f, 0x44, 0x37, 0x40, 0x3f, 0x3c, 0x3e, 0x3d, 0x38, 0x42, 0x34, 0x62, + 0x51, 0x47, 0x44, 0x3f, 0x32, 0x3c, 0x3f, 0x46, 0x3d, 0x46, 0x3e, 0x45, + 0x4a, 0x3e, 0x5d, 0x43, 0x45, 0x49, 0x4a, 0x55, 0x41, 0x3c, 0x5a, 0x44, + 0x43, 0x3b, 0x3c, 0x3a, 0x4b, 0x4e, 0x4d, 0x42, 0x49, 0x30, 0x3b, 0x38, + 0x42, 0x44, 0x51, 0x40, 0x48, 0x33, 0x3f, 0x2b, 0x3c, 0x41, 0x3c, 0x45, + 0x35, 0x39, 0x42, 0x37, 0x40, 0x46, 0x46, 0x3f, 0x41, 0x45, 0x42, 0x3d, + 0x43, 0x38, 0x3e, 0x38, 0x3c, 0x39, 0x40, 0x38, 0x37, 0x36, 0x3d, 0x3d, + 0x38, 0x47, 0x45, 0x3b, 0x45, 0x44, 0x42, 0x2e, 0x37, 0x40, 0x42, 0x42, + 0x3c, 0x36, 0x3b, 0x39, 0x44, 0x4d, 0x42, 0x3f, 0x3a, 0x3e, 0x45, 0x34, + 0x3c, 0x43, 0x47, 0x43, 0x3f, 0x48, 0x3b, 0x44, 0x3d, 0x44, 0x43, 0x3e, + 0x40, 0x4a, 0x31, 0x42, 0x42, 0x43, 0x48, 0x45, 0x3a, 0x42, 0x36, 0x2f, + 0x3c, 0x3e, 0x3b, 0x3b, 0x44, 0x3f, 0x3a, 0x2c, 0x47, 0x3f, 0x4a, 0x40, + 0x40, 0x40, 0x3c, 0x2a, 0x3e, 0x44, 0x40, 0x43, 0x3a, 0x42, 0x39, 0x34, + 0x49, 0x3e, 0x36, 0x42, 0x3f, 0x42, 0x33, 0x3b, 0x3c, 0x45, 0x39, 0x3f, + 0x3e, 0x3f, 0x41, 0x3d, 0x32, 0x3b, 0x31, 0x40, 0x3f, 0x44, 0x3c, 0x3f, + 0x40, 0x46, 0x45, 0x36, 0x36, 0x42, 0x30, 0x57, 0x47, 0x44, 0x48, 0x3f, + 0x35, 0x37, 0x3f, 0x3f, 0x38, 0x4a, 0x41, 0x46, 0x50, 0x3d, 0x5b, 0x41, + 0x3e, 0x3c, 0x4a, 0x54, 0x45, 0x41, 0x5b, 0x46, 0x3d, 0x3b, 0x43, 0x33, + 0x45, 0x4e, 0x43, 0x3b, 0x44, 0x37, 0x37, 0x32, 0x4c, 0x3d, 0x4c, 0x3f, + 0x49, 0x3b, 0x37, 0x3a, 0x33, 0x43, 0x3f, 0x40, 0x44, 0x36, 0x3b, 0x44, + 0x45, 0x40, 0x3c, 0x3c, 0x41, 0x44, 0x3b, 0x3d, 0x33, 0x37, 0x3c, 0x35, + 0x3d, 0x3f, 0x39, 0x38, 0x33, 0x43, 0x3e, 0x39, 0x3b, 0x3e, 0x41, 0x35, + 0x40, 0x46, 0x43, 0x35, 0x41, 0x3d, 0x32, 0x39, 0x3c, 0x40, 0x3e, 0x3f, + 0x42, 0x38, 0x3b, 0x45, 0x3a, 0x3d, 0x40, 0x36, 0x3a, 0x40, 0x46, 0x44, + 0x48, 0x45, 0x3f, 0x3a, 0x45, 0x45, 0x3c, 0x3b, 0x40, 0x4c, 0x39, 0x3a, + 0x38, 0x39, 0x46, 0x3a, 0x3e, 0x4b, 0x34, 0x39, 0x3d, 0x3f, 0x40, 0x39, + 0x45, 0x31, 0x45, 0x29, 0x3f, 0x38, 0x3a, 0x3f, 0x38, 0x3b, 0x36, 0x2d, + 0x43, 0x3d, 0x45, 0x3c, 0x46, 0x3f, 0x40, 0x3c, 0x3a, 0x3e, 0x3d, 0x38, + 0x3f, 0x3c, 0x3f, 0x42, 0x35, 0x3f, 0x3a, 0x43, 0x3d, 0x43, 0x3d, 0x33, + 0x3d, 0x48, 0x42, 0x3d, 0x45, 0x46, 0x3d, 0x35, 0x32, 0x44, 0x42, 0x37, + 0x3d, 0x40, 0x3c, 0x47, 0x4a, 0x45, 0x47, 0x2f, 0x33, 0x36, 0x3f, 0x42, + 0x38, 0x43, 0x3e, 0x3a, 0x41, 0x3f, 0x5f, 0x3f, 0x48, 0x3a, 0x44, 0x47, + 0x41, 0x3e, 0x57, 0x42, 0x41, 0x33, 0x34, 0x39, 0x42, 0x44, 0x42, 0x3c, + 0x49, 0x34, 0x37, 0x33, 0x47, 0x38, 0x43, 0x3d, 0x43, 0x3e, 0x3e, 0x36, + 0x41, 0x41, 0x37, 0x40, 0x39, 0x3e, 0x3b, 0x3b, 0x3e, 0x41, 0x3d, 0x3b, + 0x43, 0x3e, 0x39, 0x43, 0x2f, 0x3e, 0x33, 0x40, 0x45, 0x47, 0x30, 0x46, + 0x3f, 0x3f, 0x37, 0x42, 0x3d, 0x42, 0x43, 0x37, 0x38, 0x3c, 0x35, 0x34, + 0x41, 0x43, 0x3e, 0x3e, 0x3f, 0x49, 0x35, 0x35, 0x38, 0x36, 0x3a, 0x43, + 0x38, 0x46, 0x48, 0x36, 0x3f, 0x39, 0x3b, 0x3e, 0x48, 0x47, 0x41, 0x34, + 0x3b, 0x3c, 0x37, 0x3e, 0x40, 0x41, 0x3b, 0x3d, 0x43, 0x42, 0x3a, 0x39, + 0x3b, 0x43, 0x38, 0x2b, 0x43, 0x41, 0x48, 0x35, 0x44, 0x44, 0x3e, 0x2c, + 0x46, 0x40, 0x3e, 0x41, 0x38, 0x34, 0x35, 0x37, 0x34, 0x3f, 0x3d, 0x46, + 0x33, 0x3c, 0x3c, 0x2e, 0x3b, 0x45, 0x3d, 0x3e, 0x3a, 0x42, 0x3c, 0x36, + 0x3a, 0x42, 0x39, 0x43, 0x35, 0x39, 0x40, 0x44, 0x47, 0x41, 0x44, 0x3d, + 0x41, 0x3e, 0x38, 0x39, 0x45, 0x3a, 0x35, 0x43, 0x3f, 0x44, 0x41, 0x49, + 0x47, 0x3f, 0x44, 0x40, 0x38, 0x43, 0x40, 0x3e, 0x39, 0x42, 0x32, 0x3b, + 0x42, 0x47, 0x57, 0x37, 0x36, 0x38, 0x43, 0x49, 0x3b, 0x34, 0x54, 0x42, + 0x3d, 0x3f, 0x3e, 0x3b, 0x38, 0x41, 0x43, 0x3a, 0x44, 0x39, 0x34, 0x2c, + 0x38, 0x43, 0x4b, 0x3f, 0x40, 0x3e, 0x32, 0x33, 0x3d, 0x44, 0x45, 0x44, + 0x3e, 0x35, 0x37, 0x39, 0x40, 0x3e, 0x40, 0x3c, 0x34, 0x43, 0x37, 0x40, + 0x39, 0x3e, 0x3d, 0x43, 0x3a, 0x44, 0x43, 0x44, 0x3d, 0x3b, 0x45, 0x3b, + 0x3a, 0x3a, 0x3f, 0x37, 0x43, 0x3b, 0x33, 0x35, 0x40, 0x47, 0x3e, 0x3c, + 0x39, 0x3c, 0x34, 0x29, 0x3c, 0x3e, 0x46, 0x3e, 0x3c, 0x38, 0x3f, 0x2d, + 0x3d, 0x3d, 0x3f, 0x3f, 0x3d, 0x45, 0x3b, 0x32, 0x39, 0x3f, 0x41, 0x38, + 0x36, 0x3e, 0x3a, 0x35, 0x40, 0x3f, 0x3b, 0x32, 0x3c, 0x39, 0x3e, 0x35, + 0x3e, 0x45, 0x34, 0x38, 0x44, 0x39, 0x3f, 0x31, 0x34, 0x39, 0x3f, 0x38, + 0x44, 0x42, 0x3f, 0x3b, 0x39, 0x3d, 0x39, 0x3b, 0x44, 0x46, 0x38, 0x3d, + 0x45, 0x37, 0x40, 0x3a, 0x3a, 0x39, 0x35, 0x3c, 0x39, 0x40, 0x47, 0x3e, + 0x38, 0x42, 0x41, 0x3b, 0x48, 0x3f, 0x3a, 0x3e, 0x3d, 0x3f, 0x32, 0x3b, + 0x3f, 0x3d, 0x3e, 0x44, 0x43, 0x41, 0x44, 0x47, 0x48, 0x41, 0x41, 0x36, + 0x3a, 0x33, 0x3c, 0x3c, 0x37, 0x3e, 0x40, 0x34, 0x3f, 0x42, 0x53, 0x40, + 0x3f, 0x35, 0x3e, 0x46, 0x3a, 0x3e, 0x4b, 0x41, 0x46, 0x32, 0x39, 0x36, + 0x3b, 0x4f, 0x36, 0x3c, 0x40, 0x3a, 0x40, 0x40, 0x47, 0x3e, 0x49, 0x37, + 0x3f, 0x31, 0x3e, 0x40, 0x3b, 0x3f, 0x43, 0x44, 0x3a, 0x3d, 0x31, 0x41, + 0x41, 0x33, 0x43, 0x40, 0x3c, 0x3a, 0x41, 0x40, 0x37, 0x3f, 0x34, 0x3e, + 0x44, 0x42, 0x3d, 0x3f, 0x3f, 0x34, 0x36, 0x34, 0x31, 0x41, 0x32, 0x39, + 0x3e, 0x3d, 0x42, 0x35, 0x3e, 0x3a, 0x41, 0x47, 0x3d, 0x42, 0x33, 0x32, + 0x43, 0x42, 0x36, 0x41, 0x3e, 0x39, 0x46, 0x39, 0x35, 0x3d, 0x3d, 0x40, + 0x38, 0x44, 0x3d, 0x31, 0x44, 0x39, 0x3a, 0x45, 0x42, 0x41, 0x3d, 0x36, + 0x3f, 0x3c, 0x39, 0x3d, 0x32, 0x39, 0x42, 0x34, 0x3f, 0x38, 0x44, 0x3c, + 0x43, 0x45, 0x41, 0x2d, 0x44, 0x42, 0x3d, 0x3f, 0x44, 0x38, 0x3d, 0x35, + 0x3a, 0x48, 0x40, 0x3b, 0x3d, 0x36, 0x3b, 0x40, 0x3f, 0x3a, 0x3a, 0x3f, + 0x3c, 0x33, 0x39, 0x3c, 0x3c, 0x38, 0x47, 0x36, 0x3d, 0x41, 0x46, 0x41, + 0x34, 0x46, 0x48, 0x46, 0x3d, 0x3c, 0x40, 0x43, 0x3d, 0x41, 0x37, 0x3e, + 0x39, 0x47, 0x3f, 0x39, 0x46, 0x43, 0x3f, 0x41, 0x45, 0x37, 0x40, 0x3a, + 0x3d, 0x44, 0x3f, 0x3b, 0x3b, 0x40, 0x4f, 0x3d, 0x3d, 0x41, 0x3c, 0x43, + 0x3e, 0x46, 0x4e, 0x40, 0x3f, 0x34, 0x48, 0x29, 0x45, 0x44, 0x46, 0x41, + 0x45, 0x32, 0x3e, 0x38, 0x39, 0x3a, 0x3e, 0x3e, 0x4c, 0x34, 0x3c, 0x40, + 0x4a, 0x44, 0x3d, 0x46, 0x3b, 0x3e, 0x42, 0x42, 0x3a, 0x41, 0x43, 0x41, + 0x39, 0x3f, 0x3e, 0x3c, 0x36, 0x48, 0x3f, 0x3e, 0x3e, 0x37, 0x3f, 0x3f, + 0x3b, 0x40, 0x3e, 0x35, 0x32, 0x35, 0x3f, 0x33, 0x3f, 0x38, 0x43, 0x37, + 0x49, 0x38, 0x37, 0x3c, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x46, 0x37, 0x34, + 0x34, 0x3b, 0x3d, 0x2f, 0x3a, 0x38, 0x3d, 0x46, 0x3d, 0x3b, 0x3d, 0x38, + 0x35, 0x37, 0x44, 0x3c, 0x3d, 0x3e, 0x40, 0x3a, 0x40, 0x33, 0x3e, 0x38, + 0x40, 0x3e, 0x45, 0x37, 0x3f, 0x3b, 0x3c, 0x40, 0x3b, 0x3c, 0x3b, 0x33, + 0x41, 0x3f, 0x3b, 0x42, 0x31, 0x3b, 0x3a, 0x39, 0x3d, 0x41, 0x39, 0x40, + 0x43, 0x45, 0x39, 0x3b, 0x3a, 0x42, 0x43, 0x3d, 0x3f, 0x40, 0x47, 0x39, + 0x37, 0x3f, 0x47, 0x3f, 0x45, 0x41, 0x39, 0x3a, 0x41, 0x38, 0x3c, 0x3c, + 0x39, 0x40, 0x39, 0x3b, 0x3b, 0x3e, 0x38, 0x3b, 0x37, 0x48, 0x41, 0x3f, + 0x3e, 0x37, 0x3d, 0x44, 0x3c, 0x3e, 0x40, 0x39, 0x41, 0x42, 0x3d, 0x45, + 0x3b, 0x3e, 0x4c, 0x3b, 0x3a, 0x3a, 0x3e, 0x47, 0x3c, 0x3f, 0x48, 0x3f, + 0x46, 0x3f, 0x39, 0x25, 0x44, 0x3a, 0x3b, 0x40, 0x41, 0x39, 0x39, 0x47, + 0x3b, 0x32, 0x49, 0x42, 0x41, 0x3a, 0x43, 0x41, 0x3e, 0x35, 0x37, 0x3d, + 0x49, 0x40, 0x45, 0x3b, 0x3c, 0x38, 0x48, 0x3c, 0x3c, 0x35, 0x3f, 0x41, + 0x41, 0x4c, 0x36, 0x39, 0x37, 0x3d, 0x3b, 0x3e, 0x44, 0x32, 0x3d, 0x3f, + 0x3a, 0x3b, 0x3a, 0x47, 0x38, 0x42, 0x36, 0x34, 0x43, 0x3f, 0x3e, 0x40, + 0x34, 0x31, 0x36, 0x33, 0x42, 0x37, 0x41, 0x41, 0x40, 0x3d, 0x3d, 0x37, + 0x43, 0x3a, 0x3e, 0x44, 0x43, 0x3c, 0x35, 0x38, 0x38, 0x3c, 0x43, 0x36, + 0x3a, 0x38, 0x40, 0x3f, 0x3d, 0x3e, 0x37, 0x3b, 0x41, 0x3a, 0x3b, 0x3d, + 0x3c, 0x41, 0x3c, 0x41, 0x47, 0x3f, 0x3f, 0x3b, 0x3d, 0x3f, 0x3b, 0x45, + 0x38, 0x38, 0x40, 0x38, 0x46, 0x42, 0x39, 0x3d, 0x3d, 0x3b, 0x42, 0x36, + 0x42, 0x41, 0x3e, 0x3e, 0x36, 0x3f, 0x37, 0x3f, 0x36, 0x48, 0x3b, 0x39, + 0x3d, 0x3f, 0x43, 0x3e, 0x3c, 0x40, 0x48, 0x46, 0x43, 0x36, 0x42, 0x39, + 0x46, 0x3c, 0x37, 0x38, 0x49, 0x37, 0x36, 0x39, 0x3e, 0x42, 0x48, 0x3a, + 0x3c, 0x3e, 0x42, 0x30, 0x3e, 0x34, 0x39, 0x3b, 0x46, 0x61, 0x46, 0x1e, + 0x4c, 0x3b, 0x40, 0x2d, 0x3c, 0x42, 0x32, 0x30, 0x49, 0x3e, 0x39, 0x34, + 0x30, 0x40, 0x31, 0x38, 0x40, 0x3d, 0x3c, 0x35, 0x3a, 0x36, 0x40, 0x3b, + 0x41, 0x40, 0x3b, 0x39, 0x37, 0x37, 0x3f, 0x3b, 0x3c, 0x3a, 0x40, 0x3a, + 0x36, 0x3c, 0x42, 0x39, 0x3e, 0x36, 0x40, 0x42, 0x39, 0x40, 0x3b, 0x34, + 0x37, 0x33, 0x36, 0x3f, 0x43, 0x33, 0x33, 0x27, 0x3d, 0x46, 0x40, 0x31, + 0x38, 0x3e, 0x41, 0x20, 0x3f, 0x39, 0x42, 0x35, 0x35, 0x45, 0x40, 0x1e, + 0x32, 0x35, 0x32, 0x3c, 0x35, 0x44, 0x46, 0x29, 0x3a, 0x3d, 0x37, 0x42, + 0x3b, 0x45, 0x3a, 0x26, 0x38, 0x40, 0x30, 0x37, 0x41, 0x40, 0x39, 0x2b, + 0x49, 0x3f, 0x43, 0x43, 0x40, 0x3a, 0x38, 0x29, 0x43, 0x3a, 0x37, 0x40, + 0x3f, 0x35, 0x3a, 0x28, 0x36, 0x3e, 0x3f, 0x43, 0x3c, 0x39, 0x42, 0x2c, + 0x38, 0x42, 0x38, 0x3d, 0x42, 0x38, 0x35, 0x2d, 0x34, 0x38, 0x3d, 0x43, + 0x46, 0x3e, 0x3c, 0x27, 0x3e, 0x40, 0x46, 0x39, 0x35, 0x3d, 0x42, 0x35, + 0x42, 0x36, 0x40, 0x3e, 0x3a, 0x3e, 0x3c, 0x37, 0x3a, 0x3c, 0x48, 0x48, + 0x48, 0x37, 0x3d, 0x38, 0x4b, 0x40, 0x43, 0x3b, 0x41, 0x46, 0x3c, 0x34, + 0x46, 0x3c, 0x3c, 0x3c, 0x4b, 0x64, 0x4a, 0x22, 0x52, 0x41, 0x42, 0x3b, + 0x42, 0x4a, 0x34, 0x37, 0x4b, 0x44, 0x3b, 0x4a, 0x38, 0x3f, 0x38, 0x3a, + 0x40, 0x41, 0x42, 0x3c, 0x33, 0x3e, 0x3c, 0x42, 0x2c, 0x4e, 0x47, 0x3f, + 0x38, 0x33, 0x39, 0x3f, 0x3b, 0x45, 0x37, 0x3a, 0x42, 0x42, 0x44, 0x3f, + 0x3c, 0x3c, 0x3e, 0x3d, 0x3c, 0x3c, 0x40, 0x2c, 0x3c, 0x3d, 0x42, 0x39, + 0x3a, 0x37, 0x43, 0x2a, 0x3d, 0x40, 0x41, 0x41, 0x46, 0x46, 0x42, 0x28, + 0x39, 0x3c, 0x37, 0x44, 0x46, 0x41, 0x47, 0x2b, 0x44, 0x33, 0x39, 0x3f, + 0x3f, 0x43, 0x3d, 0x23, 0x3a, 0x43, 0x41, 0x3b, 0x41, 0x42, 0x33, 0x1f, + 0x43, 0x3e, 0x3d, 0x40, 0x37, 0x33, 0x42, 0x28, 0x3b, 0x38, 0x37, 0x3c, + 0x34, 0x40, 0x44, 0x2a, 0x3c, 0x3a, 0x41, 0x37, 0x45, 0x3f, 0x3e, 0x26, + 0x41, 0x40, 0x35, 0x3d, 0x45, 0x3e, 0x3d, 0x29, 0x3c, 0x39, 0x3f, 0x3c, + 0x3d, 0x39, 0x38, 0x2d, 0x39, 0x38, 0x38, 0x44, 0x3c, 0x3e, 0x38, 0x26, + 0x40, 0x36, 0x39, 0x38, 0x3f, 0x32, 0x39, 0x35, 0x3d, 0x3e, 0x35, 0x3a, + 0x3f, 0x3f, 0x31, 0x35, 0x34, 0x45, 0x3e, 0x43, 0x48, 0x3b, 0x37, 0x39, + 0x4d, 0x46, 0x54, 0x40, 0x41, 0x4e, 0x3d, 0x38, 0x4d, 0x38, 0x3a, 0x3b, + 0x49, 0x5a, 0x4a, 0x1e, 0x5e, 0x39, 0x38, 0x37, 0x3a, 0x51, 0x3a, 0x3c, + 0x50, 0x3f, 0x40, 0x42, 0x33, 0x3b, 0x2e, 0x4a, 0x3f, 0x4a, 0x3b, 0x43, + 0x36, 0x3e, 0x3d, 0x42, 0x39, 0x46, 0x4b, 0x3c, 0x3b, 0x3b, 0x35, 0x3e, + 0x3d, 0x4b, 0x3f, 0x41, 0x3f, 0x3b, 0x42, 0x42, 0x38, 0x3a, 0x41, 0x3d, + 0x36, 0x41, 0x37, 0x2f, 0x38, 0x37, 0x3f, 0x34, 0x35, 0x35, 0x45, 0x30, + 0x31, 0x42, 0x31, 0x3a, 0x3a, 0x3e, 0x3d, 0x23, 0x3f, 0x43, 0x3b, 0x41, + 0x35, 0x3b, 0x40, 0x25, 0x45, 0x3e, 0x42, 0x3b, 0x31, 0x40, 0x36, 0x28, + 0x43, 0x42, 0x30, 0x42, 0x32, 0x32, 0x36, 0x2c, 0x35, 0x3a, 0x3d, 0x3a, + 0x3c, 0x36, 0x3e, 0x30, 0x41, 0x42, 0x38, 0x41, 0x41, 0x3e, 0x3c, 0x23, + 0x37, 0x40, 0x3c, 0x3e, 0x3e, 0x3a, 0x37, 0x2b, 0x36, 0x40, 0x41, 0x42, + 0x3e, 0x38, 0x44, 0x22, 0x46, 0x38, 0x33, 0x3b, 0x3a, 0x3a, 0x3a, 0x24, + 0x36, 0x3b, 0x38, 0x44, 0x34, 0x38, 0x40, 0x28, 0x38, 0x3d, 0x36, 0x44, + 0x31, 0x3e, 0x37, 0x37, 0x36, 0x3f, 0x47, 0x38, 0x3b, 0x3e, 0x2c, 0x4c, + 0x36, 0x3c, 0x3b, 0x41, 0x4c, 0x3d, 0x3d, 0x40, 0x49, 0x44, 0x52, 0x3f, + 0x3b, 0x4d, 0x3c, 0x3a, 0x4f, 0x3b, 0x36, 0x3b, 0x4a, 0x5f, 0x4e, 0x1f, + 0x57, 0x3c, 0x3d, 0x3d, 0x46, 0x59, 0x42, 0x45, 0x52, 0x3d, 0x3a, 0x41, + 0x31, 0x39, 0x39, 0x4f, 0x43, 0x4e, 0x3e, 0x37, 0x3a, 0x37, 0x33, 0x47, + 0x32, 0x45, 0x47, 0x43, 0x31, 0x33, 0x38, 0x43, 0x3e, 0x47, 0x3d, 0x32, + 0x3b, 0x39, 0x3c, 0x42, 0x3d, 0x47, 0x42, 0x40, 0x3d, 0x3f, 0x3c, 0x34, + 0x3b, 0x3e, 0x42, 0x3d, 0x43, 0x35, 0x42, 0x2c, 0x35, 0x3d, 0x3c, 0x3d, + 0x3a, 0x3c, 0x46, 0x25, 0x43, 0x35, 0x3d, 0x39, 0x3a, 0x3c, 0x40, 0x2b, + 0x33, 0x40, 0x3d, 0x46, 0x45, 0x37, 0x3c, 0x36, 0x43, 0x37, 0x3e, 0x3a, + 0x3c, 0x47, 0x3f, 0x38, 0x36, 0x3e, 0x3a, 0x42, 0x3c, 0x42, 0x33, 0x39, + 0x3c, 0x3a, 0x3c, 0x40, 0x48, 0x3b, 0x40, 0x32, 0x37, 0x47, 0x34, 0x38, + 0x33, 0x3d, 0x49, 0x2d, 0x36, 0x42, 0x3d, 0x3e, 0x47, 0x3c, 0x42, 0x2c, + 0x3b, 0x31, 0x3f, 0x3c, 0x3d, 0x3c, 0x3f, 0x2b, 0x41, 0x35, 0x33, 0x43, + 0x47, 0x39, 0x34, 0x2a, 0x3a, 0x3a, 0x40, 0x3d, 0x44, 0x3c, 0x39, 0x34, + 0x43, 0x40, 0x33, 0x3a, 0x3b, 0x42, 0x38, 0x3b, 0x34, 0x35, 0x40, 0x43, + 0x4b, 0x41, 0x3d, 0x38, 0x49, 0x44, 0x4d, 0x37, 0x3a, 0x4b, 0x40, 0x39, + 0x4e, 0x3b, 0x30, 0x38, 0x47, 0x5d, 0x50, 0x1f, 0x54, 0x35, 0x3a, 0x39, + 0x40, 0x4c, 0x46, 0x42, 0x52, 0x39, 0x39, 0x45, 0x41, 0x3c, 0x30, 0x5b, + 0x43, 0x4d, 0x4a, 0x3e, 0x31, 0x39, 0x41, 0x4c, 0x36, 0x44, 0x4c, 0x39, + 0x32, 0x41, 0x47, 0x3e, 0x34, 0x49, 0x45, 0x3b, 0x34, 0x3a, 0x3b, 0x47, + 0x43, 0x3e, 0x43, 0x32, 0x40, 0x3e, 0x3e, 0x38, 0x37, 0x3e, 0x37, 0x3a, + 0x3a, 0x40, 0x48, 0x2f, 0x3e, 0x3e, 0x46, 0x3a, 0x3e, 0x35, 0x49, 0x30, + 0x3a, 0x41, 0x3e, 0x39, 0x34, 0x45, 0x3d, 0x34, 0x48, 0x43, 0x43, 0x42, + 0x33, 0x39, 0x3b, 0x3f, 0x30, 0x46, 0x41, 0x39, 0x48, 0x3a, 0x3c, 0x3e, + 0x3f, 0x36, 0x40, 0x3d, 0x43, 0x40, 0x3e, 0x39, 0x44, 0x40, 0x44, 0x3b, + 0x43, 0x42, 0x39, 0x38, 0x3a, 0x3f, 0x3b, 0x3f, 0x38, 0x3d, 0x34, 0x30, + 0x34, 0x3d, 0x3f, 0x42, 0x44, 0x3e, 0x34, 0x32, 0x37, 0x46, 0x44, 0x38, + 0x3c, 0x45, 0x39, 0x2b, 0x41, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x3c, 0x32, + 0x45, 0x42, 0x3d, 0x46, 0x38, 0x3b, 0x34, 0x35, 0x38, 0x43, 0x3d, 0x34, + 0x42, 0x3b, 0x38, 0x3d, 0x37, 0x43, 0x3f, 0x39, 0x4e, 0x39, 0x40, 0x3f, + 0x4d, 0x43, 0x49, 0x3f, 0x36, 0x41, 0x44, 0x39, 0x48, 0x3a, 0x35, 0x39, + 0x48, 0x59, 0x4e, 0x25, 0x58, 0x39, 0x42, 0x35, 0x43, 0x4e, 0x42, 0x3f, + 0x4a, 0x43, 0x3b, 0x3f, 0x3b, 0x37, 0x2b, 0x5a, 0x3d, 0x44, 0x3b, 0x40, + 0x31, 0x38, 0x37, 0x44, 0x32, 0x3e, 0x41, 0x3d, 0x2c, 0x42, 0x42, 0x3c, + 0x37, 0x45, 0x41, 0x41, 0x3d, 0x39, 0x41, 0x40, 0x3a, 0x46, 0x41, 0x40, + 0x40, 0x3d, 0x38, 0x31, 0x37, 0x3f, 0x42, 0x38, 0x3f, 0x3c, 0x48, 0x30, + 0x3e, 0x39, 0x3f, 0x3d, 0x3d, 0x44, 0x52, 0x35, 0x3b, 0x32, 0x42, 0x32, + 0x3a, 0x43, 0x39, 0x3b, 0x31, 0x43, 0x36, 0x3c, 0x3c, 0x3c, 0x41, 0x45, + 0x42, 0x49, 0x41, 0x3b, 0x42, 0x3e, 0x41, 0x44, 0x36, 0x41, 0x3f, 0x3c, + 0x3e, 0x47, 0x45, 0x41, 0x38, 0x41, 0x3f, 0x43, 0x35, 0x32, 0x41, 0x39, + 0x36, 0x47, 0x35, 0x42, 0x44, 0x3b, 0x3f, 0x34, 0x48, 0x41, 0x43, 0x42, + 0x36, 0x3e, 0x3c, 0x3d, 0x3d, 0x3b, 0x42, 0x44, 0x3a, 0x44, 0x36, 0x2a, + 0x41, 0x39, 0x3a, 0x41, 0x46, 0x3c, 0x44, 0x2f, 0x36, 0x39, 0x3b, 0x3f, + 0x38, 0x45, 0x3c, 0x3c, 0x3e, 0x41, 0x3c, 0x39, 0x3e, 0x40, 0x2f, 0x45, + 0x3b, 0x41, 0x40, 0x3c, 0x4e, 0x38, 0x3e, 0x48, 0x46, 0x40, 0x48, 0x44, + 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x39, 0x37, 0x3a, 0x4e, 0x59, 0x5c, 0x22, + 0x58, 0x32, 0x38, 0x34, 0x40, 0x4b, 0x43, 0x43, 0x4f, 0x3e, 0x39, 0x40, + 0x37, 0x3e, 0x2f, 0x55, 0x3f, 0x40, 0x38, 0x3f, 0x3a, 0x33, 0x37, 0x3d, + 0x34, 0x4c, 0x37, 0x3f, 0x32, 0x39, 0x45, 0x34, 0x44, 0x4c, 0x3f, 0x3b, + 0x3c, 0x36, 0x36, 0x43, 0x36, 0x47, 0x41, 0x46, 0x41, 0x3e, 0x41, 0x3a, + 0x43, 0x3a, 0x48, 0x42, 0x42, 0x3e, 0x4c, 0x36, 0x3d, 0x39, 0x43, 0x46, + 0x3d, 0x42, 0x42, 0x3b, 0x45, 0x43, 0x3c, 0x40, 0x39, 0x37, 0x34, 0x45, + 0x3f, 0x40, 0x34, 0x38, 0x43, 0x3f, 0x36, 0x47, 0x3f, 0x3b, 0x49, 0x3c, + 0x3a, 0x3a, 0x42, 0x4c, 0x37, 0x3e, 0x3b, 0x32, 0x47, 0x40, 0x45, 0x4d, + 0x39, 0x3b, 0x39, 0x40, 0x3e, 0x3c, 0x3d, 0x3a, 0x3d, 0x3b, 0x3e, 0x43, + 0x3e, 0x3f, 0x3a, 0x3c, 0x41, 0x40, 0x39, 0x3c, 0x3a, 0x38, 0x39, 0x37, + 0x36, 0x33, 0x43, 0x45, 0x3f, 0x45, 0x41, 0x30, 0x3b, 0x34, 0x3c, 0x39, + 0x3b, 0x45, 0x37, 0x2e, 0x36, 0x34, 0x36, 0x44, 0x3d, 0x40, 0x3a, 0x3c, + 0x3d, 0x3b, 0x38, 0x41, 0x42, 0x3a, 0x32, 0x4b, 0x38, 0x3e, 0x41, 0x46, + 0x57, 0x3a, 0x44, 0x48, 0x47, 0x45, 0x47, 0x3e, 0x43, 0x42, 0x45, 0x3b, + 0x50, 0x39, 0x37, 0x3f, 0x47, 0x51, 0x5e, 0x22, 0x59, 0x33, 0x3c, 0x37, + 0x43, 0x50, 0x49, 0x47, 0x46, 0x42, 0x39, 0x44, 0x44, 0x3d, 0x2f, 0x53, + 0x35, 0x41, 0x40, 0x3d, 0x2d, 0x35, 0x2f, 0x3e, 0x3f, 0x37, 0x38, 0x3e, + 0x30, 0x45, 0x46, 0x38, 0x33, 0x3c, 0x3e, 0x3b, 0x44, 0x42, 0x47, 0x49, + 0x43, 0x40, 0x3d, 0x3c, 0x38, 0x43, 0x3e, 0x38, 0x3d, 0x40, 0x36, 0x43, + 0x43, 0x3e, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x3d, 0x41, 0x39, 0x3e, 0x45, + 0x39, 0x3d, 0x39, 0x40, 0x42, 0x40, 0x3b, 0x4a, 0x40, 0x41, 0x3f, 0x37, + 0x43, 0x41, 0x37, 0x4c, 0x3f, 0x3d, 0x38, 0x3a, 0x42, 0x46, 0x43, 0x4d, + 0x3c, 0x3a, 0x43, 0x3e, 0x3b, 0x3d, 0x46, 0x4a, 0x38, 0x3d, 0x3d, 0x39, + 0x3e, 0x3c, 0x3b, 0x3e, 0x3a, 0x40, 0x40, 0x34, 0x41, 0x3f, 0x3e, 0x3f, + 0x47, 0x3c, 0x32, 0x3a, 0x3c, 0x44, 0x3f, 0x42, 0x41, 0x43, 0x3e, 0x3a, + 0x3b, 0x42, 0x41, 0x39, 0x39, 0x37, 0x39, 0x3e, 0x3d, 0x33, 0x3e, 0x35, + 0x44, 0x37, 0x40, 0x35, 0x3f, 0x47, 0x37, 0x41, 0x35, 0x38, 0x47, 0x40, + 0x43, 0x44, 0x2e, 0x48, 0x35, 0x44, 0x41, 0x3c, 0x47, 0x3d, 0x3d, 0x52, + 0x48, 0x41, 0x44, 0x41, 0x42, 0x4b, 0x3e, 0x3d, 0x4e, 0x32, 0x34, 0x47, + 0x55, 0x57, 0x5f, 0x22, 0x57, 0x33, 0x40, 0x37, 0x40, 0x4a, 0x4d, 0x47, + 0x48, 0x38, 0x3e, 0x46, 0x37, 0x42, 0x28, 0x57, 0x38, 0x42, 0x36, 0x43, + 0x35, 0x37, 0x39, 0x39, 0x42, 0x39, 0x38, 0x3c, 0x35, 0x3c, 0x3c, 0x3a, + 0x3c, 0x4c, 0x45, 0x3f, 0x43, 0x3d, 0x45, 0x45, 0x40, 0x47, 0x3e, 0x3e, + 0x3d, 0x4b, 0x49, 0x35, 0x43, 0x3c, 0x36, 0x46, 0x3c, 0x46, 0x42, 0x44, + 0x3c, 0x42, 0x3d, 0x42, 0x44, 0x3c, 0x4a, 0x40, 0x40, 0x3c, 0x3b, 0x3c, + 0x35, 0x34, 0x2e, 0x46, 0x38, 0x3d, 0x38, 0x44, 0x41, 0x40, 0x3c, 0x52, + 0x3b, 0x3d, 0x3b, 0x3f, 0x42, 0x47, 0x44, 0x52, 0x44, 0x44, 0x39, 0x3f, + 0x43, 0x35, 0x3c, 0x4d, 0x39, 0x3d, 0x3b, 0x37, 0x3e, 0x38, 0x3e, 0x49, + 0x3a, 0x37, 0x3c, 0x49, 0x40, 0x41, 0x3c, 0x40, 0x3d, 0x38, 0x39, 0x3f, + 0x44, 0x3e, 0x42, 0x3e, 0x47, 0x40, 0x34, 0x46, 0x48, 0x37, 0x45, 0x3e, + 0x46, 0x3f, 0x35, 0x39, 0x38, 0x3f, 0x36, 0x2c, 0x40, 0x38, 0x3e, 0x3c, + 0x32, 0x3c, 0x46, 0x3a, 0x3f, 0x41, 0x36, 0x49, 0x42, 0x38, 0x36, 0x43, + 0x3d, 0x41, 0x46, 0x35, 0x4f, 0x3a, 0x41, 0x5c, 0x4a, 0x42, 0x4e, 0x42, + 0x46, 0x54, 0x3f, 0x45, 0x4c, 0x30, 0x33, 0x44, 0x56, 0x5d, 0x68, 0x26, + 0x60, 0x33, 0x3e, 0x3a, 0x42, 0x49, 0x52, 0x47, 0x51, 0x46, 0x40, 0x47, + 0x41, 0x3b, 0x1b, 0x4f, 0x3c, 0x45, 0x3d, 0x3d, 0x32, 0x2f, 0x3e, 0x3c, + 0x3c, 0x3f, 0x3b, 0x3c, 0x2c, 0x3a, 0x41, 0x3c, 0x35, 0x3e, 0x3e, 0x3c, + 0x3d, 0x3f, 0x3e, 0x40, 0x40, 0x44, 0x42, 0x3c, 0x3c, 0x3c, 0x41, 0x3c, + 0x3c, 0x3d, 0x3e, 0x3d, 0x3c, 0x3d, 0x4a, 0x46, 0x3f, 0x35, 0x33, 0x43, + 0x42, 0x41, 0x4d, 0x48, 0x48, 0x44, 0x3e, 0x41, 0x41, 0x36, 0x3c, 0x4c, + 0x34, 0x47, 0x42, 0x39, 0x3e, 0x43, 0x3a, 0x53, 0x3b, 0x3b, 0x42, 0x3d, + 0x41, 0x3c, 0x3e, 0x52, 0x3a, 0x44, 0x34, 0x43, 0x3d, 0x3d, 0x3a, 0x50, + 0x3e, 0x33, 0x41, 0x40, 0x3f, 0x38, 0x43, 0x42, 0x3b, 0x37, 0x3e, 0x43, + 0x3f, 0x3c, 0x41, 0x49, 0x40, 0x32, 0x40, 0x3e, 0x3b, 0x3e, 0x44, 0x3c, + 0x35, 0x37, 0x3d, 0x41, 0x34, 0x3f, 0x3a, 0x3c, 0x47, 0x32, 0x41, 0x3d, + 0x3c, 0x3a, 0x4a, 0x31, 0x43, 0x38, 0x45, 0x37, 0x49, 0x3c, 0x34, 0x3f, + 0x3d, 0x3d, 0x3d, 0x45, 0x47, 0x3e, 0x37, 0x48, 0x40, 0x3b, 0x45, 0x3d, + 0x4e, 0x42, 0x3f, 0x57, 0x4b, 0x43, 0x4b, 0x3d, 0x3f, 0x47, 0x4a, 0x43, + 0x4e, 0x30, 0x38, 0x45, 0x59, 0x60, 0x64, 0x2d, 0x5a, 0x2d, 0x34, 0x35, + 0x47, 0x54, 0x4e, 0x3f, 0x44, 0x45, 0x3c, 0x43, 0x3d, 0x40, 0x1c, 0x5a, + 0x36, 0x3f, 0x3a, 0x39, 0x37, 0x3c, 0x32, 0x3b, 0x2d, 0x4a, 0x42, 0x35, + 0x30, 0x41, 0x43, 0x3d, 0x3d, 0x45, 0x38, 0x36, 0x3e, 0x40, 0x3a, 0x4a, + 0x34, 0x3d, 0x44, 0x3c, 0x39, 0x3b, 0x52, 0x38, 0x40, 0x3b, 0x3f, 0x3f, + 0x35, 0x37, 0x46, 0x48, 0x38, 0x3b, 0x40, 0x36, 0x3d, 0x3a, 0x4f, 0x45, + 0x35, 0x3a, 0x35, 0x33, 0x37, 0x43, 0x42, 0x52, 0x37, 0x3b, 0x3d, 0x42, + 0x44, 0x3d, 0x48, 0x58, 0x33, 0x3f, 0x41, 0x44, 0x44, 0x3f, 0x3b, 0x52, + 0x47, 0x39, 0x32, 0x3b, 0x38, 0x35, 0x48, 0x50, 0x34, 0x30, 0x39, 0x43, + 0x42, 0x40, 0x3b, 0x4b, 0x43, 0x3d, 0x34, 0x44, 0x33, 0x39, 0x44, 0x4b, + 0x45, 0x3e, 0x3c, 0x3f, 0x3a, 0x3e, 0x3c, 0x45, 0x36, 0x3e, 0x3d, 0x40, + 0x43, 0x46, 0x37, 0x3d, 0x3b, 0x42, 0x43, 0x3f, 0x3a, 0x41, 0x48, 0x2f, + 0x3e, 0x39, 0x3a, 0x39, 0x3f, 0x3a, 0x41, 0x40, 0x40, 0x3c, 0x3b, 0x3b, + 0x3f, 0x40, 0x3e, 0x42, 0x38, 0x3f, 0x38, 0x3c, 0x49, 0x45, 0x3f, 0x62, + 0x55, 0x47, 0x4c, 0x3c, 0x3c, 0x4a, 0x4c, 0x46, 0x4f, 0x39, 0x3a, 0x3b, + 0x5e, 0x58, 0x6f, 0x2b, 0x5a, 0x2f, 0x3a, 0x35, 0x4b, 0x47, 0x4a, 0x46, + 0x45, 0x3e, 0x38, 0x4f, 0x3b, 0x3d, 0x21, 0x4b, 0x3d, 0x40, 0x37, 0x40, + 0x2d, 0x2c, 0x43, 0x3f, 0x2b, 0x3e, 0x3d, 0x39, 0x2f, 0x39, 0x44, 0x3c, + 0x39, 0x39, 0x43, 0x3b, 0x3d, 0x3b, 0x44, 0x39, 0x42, 0x42, 0x3e, 0x40, + 0x3b, 0x42, 0x53, 0x40, 0x32, 0x3d, 0x35, 0x3f, 0x3d, 0x45, 0x48, 0x46, + 0x3d, 0x43, 0x3c, 0x36, 0x35, 0x39, 0x3d, 0x4a, 0x39, 0x39, 0x3e, 0x41, + 0x38, 0x36, 0x3b, 0x53, 0x3c, 0x36, 0x32, 0x3b, 0x43, 0x3d, 0x42, 0x57, + 0x35, 0x2f, 0x38, 0x40, 0x2f, 0x3d, 0x3c, 0x4c, 0x40, 0x2f, 0x3a, 0x36, + 0x39, 0x3c, 0x3a, 0x51, 0x3d, 0x37, 0x39, 0x3c, 0x42, 0x40, 0x43, 0x52, + 0x3e, 0x42, 0x3e, 0x45, 0x36, 0x34, 0x42, 0x4b, 0x3a, 0x38, 0x37, 0x3f, + 0x36, 0x41, 0x3a, 0x45, 0x3e, 0x38, 0x35, 0x41, 0x35, 0x34, 0x37, 0x3c, + 0x3f, 0x31, 0x3c, 0x35, 0x33, 0x43, 0x36, 0x28, 0x44, 0x42, 0x3e, 0x42, + 0x3a, 0x41, 0x43, 0x35, 0x3d, 0x3f, 0x40, 0x3e, 0x3d, 0x33, 0x31, 0x41, + 0x3d, 0x40, 0x3b, 0x40, 0x51, 0x40, 0x3f, 0xfb, 0x51, 0x49, 0x4c, 0x3d, + 0x44, 0x4e, 0x47, 0x42, 0x50, 0x39, 0x39, 0x40, 0x59, 0x5d, 0x70, 0x2c, + 0x59, 0x39, 0x38, 0x2f, 0x46, 0x50, 0x51, 0x47, 0x4c, 0x3c, 0x39, 0x48, + 0x44, 0x3a, 0x1a, 0x51, 0x35, 0x3e, 0x34, 0x3a, 0x3d, 0x2b, 0x41, 0x39, + 0x37, 0x4d, 0x3e, 0x43, 0x38, 0x3b, 0x3a, 0x35, 0x36, 0x3a, 0x43, 0x39, + 0x39, 0x3a, 0x46, 0x3b, 0x39, 0x3c, 0x46, 0x36, 0x3e, 0x3d, 0x4b, 0x3d, + 0x3b, 0x46, 0x3a, 0x41, 0x31, 0x3c, 0x44, 0x4a, 0x37, 0x42, 0x39, 0x43, + 0x43, 0x3e, 0x40, 0x47, 0x3c, 0x3e, 0x3b, 0x43, 0x34, 0x3a, 0x43, 0x53, + 0x3f, 0x37, 0x39, 0x37, 0x3e, 0x3b, 0x46, 0x59, 0x37, 0x37, 0x33, 0x3d, + 0x38, 0x42, 0x36, 0x58, 0x2e, 0x32, 0x2b, 0x45, 0x32, 0x33, 0x36, 0x50, + 0x41, 0x3f, 0x37, 0x3d, 0x3f, 0x3d, 0x46, 0x49, 0x41, 0x38, 0x33, 0x3d, + 0x33, 0x32, 0x3a, 0x49, 0x41, 0x41, 0x3d, 0x33, 0x3b, 0x3b, 0x3a, 0x46, + 0x34, 0x44, 0x3f, 0x3b, 0x2f, 0x3f, 0x32, 0x3c, 0x3f, 0x43, 0x3e, 0x45, + 0x3a, 0x3c, 0x43, 0x26, 0x46, 0x37, 0x38, 0x3e, 0x36, 0x31, 0x3e, 0x34, + 0x39, 0x3a, 0x38, 0x42, 0x38, 0x3e, 0x32, 0x42, 0x37, 0x37, 0x3c, 0x3a, + 0x48, 0x44, 0x3a, 0x68, 0x56, 0x46, 0x4d, 0x47, 0x40, 0x4e, 0x42, 0x46, + 0x51, 0x40, 0x38, 0x43, 0x58, 0x5d, 0x6a, 0x31, 0x57, 0x32, 0x3c, 0x36, + 0x49, 0x56, 0x52, 0x48, 0x4b, 0x41, 0x2f, 0x4d, 0x31, 0x43, 0x1b, 0x4c, + 0x30, 0x44, 0x33, 0x36, 0x2c, 0x3d, 0x45, 0x3a, 0x35, 0x46, 0x3d, 0x39, + 0x2e, 0x38, 0x3f, 0x37, 0x41, 0x44, 0x46, 0x31, 0x33, 0x46, 0x37, 0x37, + 0x3f, 0x41, 0x45, 0x30, 0x46, 0x3b, 0x50, 0x3b, 0x40, 0x39, 0x42, 0x43, + 0x35, 0x37, 0x40, 0x44, 0x3b, 0x41, 0x3d, 0x37, 0x3a, 0x41, 0x3d, 0x46, + 0x36, 0x41, 0x38, 0x41, 0x38, 0x3d, 0x45, 0x58, 0x3d, 0x3a, 0x3d, 0x44, + 0x45, 0x38, 0x48, 0x5c, 0x3d, 0x39, 0x43, 0x45, 0x41, 0x3e, 0x4a, 0x56, + 0x40, 0x33, 0x30, 0x31, 0x42, 0x39, 0x38, 0x56, 0x30, 0x3a, 0x35, 0x3e, + 0x3f, 0x38, 0x36, 0x47, 0x3c, 0x3a, 0x3d, 0x3f, 0x37, 0x35, 0x3b, 0x4d, + 0x43, 0x36, 0x39, 0x37, 0x3e, 0x42, 0x3d, 0x3f, 0x40, 0x3f, 0x34, 0x3b, + 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a, 0x3a, 0x3c, 0x34, 0x3f, 0x3c, 0x2a, + 0x49, 0x3b, 0x36, 0x3c, 0x35, 0x46, 0x38, 0x3b, 0x3c, 0x39, 0x38, 0x42, + 0x39, 0x36, 0x2e, 0x4a, 0x3d, 0x39, 0x3f, 0x3f, 0x4b, 0x45, 0x3e, 0x67, + 0x4b, 0x4b, 0x49, 0x3e, 0x3f, 0x53, 0x4c, 0x55, 0x47, 0x32, 0x3b, 0x39, + 0x54, 0x5b, 0x6f, 0x29, 0x5a, 0x34, 0x3e, 0x26, 0x45, 0x52, 0x59, 0x44, + 0x59, 0x39, 0x3c, 0x47, 0x36, 0x46, 0x16, 0x50, 0x32, 0x46, 0x34, 0x35, + 0x35, 0x2d, 0x39, 0x38, 0x2c, 0x42, 0x43, 0x3b, 0x32, 0x3f, 0x37, 0x2f, + 0x34, 0x43, 0x46, 0x3b, 0x3b, 0x41, 0x3c, 0x37, 0x3e, 0x43, 0x4b, 0x36, + 0x3e, 0x3c, 0x4c, 0x42, 0x40, 0x3f, 0x49, 0x40, 0x3c, 0x40, 0x3c, 0x48, + 0x35, 0x42, 0x3f, 0x42, 0x44, 0x40, 0x45, 0x4f, 0x3f, 0x3f, 0x40, 0x42, + 0x3b, 0x3d, 0x49, 0x55, 0x42, 0x39, 0x41, 0x3b, 0x3f, 0x38, 0x44, 0x60, + 0x34, 0x40, 0x3b, 0x3b, 0x35, 0x3d, 0x41, 0x4e, 0x35, 0x33, 0x30, 0x3a, + 0x3a, 0x32, 0x42, 0x4f, 0x33, 0x34, 0x2f, 0x38, 0x49, 0x38, 0x40, 0x4c, + 0x35, 0x38, 0x3e, 0x46, 0x3f, 0x3a, 0x3a, 0x45, 0x3b, 0x34, 0x2e, 0x39, + 0x32, 0x3e, 0x40, 0x48, 0x35, 0x44, 0x3a, 0x34, 0x3f, 0x35, 0x3b, 0x32, + 0x40, 0x43, 0x3e, 0x38, 0x3b, 0x43, 0x3c, 0x2b, 0x46, 0x43, 0x40, 0x32, + 0x42, 0x3b, 0x49, 0x2e, 0x3b, 0x3a, 0x3e, 0x41, 0x3c, 0x3f, 0x31, 0x3b, + 0x41, 0x33, 0x41, 0x3c, 0x4d, 0x40, 0x38, 0x68, 0x4c, 0x4c, 0x4e, 0x3f, + 0x3f, 0x54, 0x4a, 0x3d, 0x4c, 0x33, 0x3b, 0x3a, 0x5d, 0x60, 0x71, 0x2b, + 0x59, 0x33, 0x3c, 0x2c, 0x47, 0x52, 0x4f, 0x51, 0x56, 0x3d, 0x39, 0x44, + 0x35, 0x41, 0x1b, 0x4a, 0x35, 0x41, 0x37, 0x35, 0x2c, 0x35, 0x37, 0x35, + 0x38, 0x41, 0x38, 0x3e, 0x3c, 0x40, 0x3c, 0x2f, 0x38, 0x3e, 0x3f, 0x45, + 0x40, 0x3d, 0x3c, 0x35, 0x3c, 0x46, 0x43, 0x39, 0x37, 0x42, 0x4e, 0x3c, + 0x42, 0x46, 0x37, 0x33, 0x43, 0x3f, 0x47, 0x4a, 0x3d, 0x3e, 0x40, 0x40, + 0x40, 0x3f, 0x4b, 0x54, 0x36, 0x3f, 0x37, 0x40, 0x39, 0x39, 0x47, 0x51, + 0x3d, 0x39, 0x36, 0x36, 0x40, 0x40, 0x41, 0x5a, 0x38, 0x39, 0x42, 0x38, + 0x40, 0x39, 0x43, 0x50, 0x3a, 0x3a, 0x32, 0x3c, 0x3c, 0x35, 0x44, 0x4a, + 0x37, 0x35, 0x36, 0x3c, 0x35, 0x30, 0x48, 0x4b, 0x3c, 0x33, 0x37, 0x3e, + 0x42, 0x3c, 0x42, 0x4e, 0x41, 0x32, 0x3e, 0x33, 0x49, 0x39, 0x3e, 0x42, + 0x3d, 0x39, 0x37, 0x36, 0x35, 0x41, 0x3e, 0x37, 0x37, 0x3e, 0x3d, 0x38, + 0x3a, 0x3c, 0x41, 0x29, 0x3c, 0x3b, 0x39, 0x40, 0x43, 0x3d, 0x3e, 0x33, + 0x3f, 0x3f, 0x3e, 0x43, 0x43, 0x38, 0x38, 0x41, 0x3b, 0x38, 0x35, 0x3a, + 0x4b, 0x44, 0x44, 0x55, 0x4e, 0x44, 0x4d, 0x49, 0x3e, 0x53, 0x45, 0x3f, + 0x45, 0x3d, 0x36, 0x36, 0x4f, 0x5b, 0x6b, 0x28, 0x59, 0x34, 0x39, 0x34, + 0x4f, 0x4d, 0x52, 0x3e, 0x51, 0x34, 0x35, 0x4a, 0x3b, 0x3f, 0x21, 0x45, + 0x36, 0x3f, 0x38, 0x33, 0x2c, 0x37, 0x32, 0x2f, 0x2b, 0x44, 0x47, 0x3f, + 0x38, 0x3a, 0x3f, 0x2e, 0x41, 0x3f, 0x3d, 0x41, 0x35, 0x48, 0x43, 0x40, + 0x33, 0x44, 0x40, 0x38, 0x47, 0x44, 0x4c, 0x3d, 0x41, 0x3b, 0x39, 0x36, + 0x3e, 0x44, 0x49, 0x48, 0x3c, 0x3b, 0x34, 0x34, 0x3f, 0x3c, 0x42, 0x52, + 0x43, 0x41, 0x3c, 0x3c, 0x3d, 0x43, 0x48, 0x54, 0x39, 0x35, 0x39, 0x3c, + 0x43, 0x3c, 0x44, 0x5f, 0x39, 0x3d, 0x38, 0x3f, 0x36, 0x3d, 0x43, 0x58, + 0x33, 0x3d, 0x43, 0x33, 0x3f, 0x36, 0x39, 0x54, 0x3a, 0x37, 0x2d, 0x46, + 0x43, 0x41, 0x47, 0x46, 0x3e, 0x42, 0x34, 0x49, 0x3a, 0x3f, 0x38, 0x50, + 0x3a, 0x3b, 0x42, 0x3a, 0x3e, 0x3c, 0x3b, 0x40, 0x42, 0x45, 0x37, 0x3b, + 0x2f, 0x3b, 0x46, 0x30, 0x42, 0x3b, 0x3b, 0x44, 0x3b, 0x3e, 0x40, 0x1e, + 0x33, 0x40, 0x40, 0x3d, 0x39, 0x3a, 0x41, 0x33, 0x45, 0x3e, 0x3c, 0x3f, + 0x3f, 0x38, 0x31, 0x46, 0x3b, 0x35, 0x42, 0x39, 0x49, 0x3e, 0x3d, 0x66, + 0x53, 0x3f, 0x44, 0x40, 0x43, 0x45, 0x48, 0x45, 0x49, 0x2d, 0x3e, 0x3a, + 0x4f, 0x5a, 0x62, 0x27, 0x54, 0x37, 0x35, 0x34, 0x42, 0x50, 0x54, 0x43, + 0x4d, 0x38, 0x39, 0x48, 0x38, 0x4c, 0x21, 0x3f, 0x40, 0x3a, 0x3a, 0x2f, + 0x37, 0x2f, 0x29, 0x2c, 0x36, 0x47, 0x3f, 0x41, 0x31, 0x33, 0x3e, 0x32, + 0x3e, 0x40, 0x42, 0x40, 0x42, 0x3a, 0x46, 0x33, 0x44, 0x40, 0x3c, 0x43, + 0x3d, 0x41, 0x4d, 0x3d, 0x3c, 0x47, 0x46, 0x43, 0x42, 0x3e, 0x44, 0x4e, + 0x41, 0x3a, 0x44, 0x38, 0x45, 0x3b, 0x49, 0x4c, 0x40, 0x3f, 0x37, 0x3e, + 0x3e, 0x46, 0x41, 0x51, 0x3f, 0x39, 0x30, 0x40, 0x3e, 0x38, 0x43, 0x5b, + 0x33, 0x3e, 0x31, 0x42, 0x3d, 0x2f, 0x49, 0x57, 0x37, 0x31, 0x46, 0x44, + 0x3e, 0x35, 0x40, 0x55, 0x36, 0x35, 0x3d, 0x3c, 0x38, 0x33, 0x42, 0x52, + 0x3b, 0x39, 0x34, 0x31, 0x45, 0x34, 0x3c, 0x51, 0x33, 0x39, 0x3c, 0x40, + 0x36, 0x36, 0x42, 0x3e, 0x37, 0x3e, 0x3b, 0x40, 0x3d, 0x36, 0x41, 0x30, + 0x42, 0x45, 0x40, 0x49, 0x3d, 0x32, 0x46, 0x26, 0x40, 0x44, 0x3a, 0x3f, + 0x3d, 0x46, 0x45, 0x31, 0x33, 0x34, 0x3e, 0x37, 0x46, 0x3b, 0x32, 0x3a, + 0x3d, 0x31, 0x3c, 0x36, 0x50, 0x41, 0x3b, 0x5d, 0x53, 0x42, 0x44, 0x37, + 0x3e, 0x4d, 0x41, 0x4b, 0x49, 0x2f, 0x35, 0x3a, 0x4e, 0x59, 0x5d, 0x27, + 0x5c, 0x30, 0x3d, 0x3a, 0x46, 0x50, 0x57, 0x4a, 0x4c, 0x36, 0x37, 0x46, + 0x48, 0x41, 0x24, 0x49, 0x36, 0x3e, 0x41, 0x45, 0x37, 0x38, 0x2e, 0x2e, + 0x34, 0x3c, 0x38, 0x41, 0x36, 0x3d, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x3b, + 0x42, 0x3c, 0x43, 0x38, 0x3e, 0x3d, 0x41, 0x48, 0x47, 0x4c, 0x45, 0x3b, + 0x37, 0x41, 0x38, 0x41, 0x3d, 0x41, 0x46, 0x4e, 0x36, 0x45, 0x38, 0x39, + 0x42, 0x42, 0x37, 0x4c, 0x34, 0x46, 0x3c, 0x44, 0x4a, 0x39, 0x45, 0x53, + 0x3c, 0x3f, 0x41, 0x35, 0x3c, 0x45, 0x4c, 0x5a, 0x44, 0x41, 0x30, 0x35, + 0x40, 0x39, 0x42, 0x5a, 0x36, 0x36, 0x3a, 0x3b, 0x43, 0x35, 0x3c, 0x56, + 0x35, 0x38, 0x2b, 0x4a, 0x3c, 0x40, 0x45, 0x54, 0x37, 0x37, 0x3a, 0x44, + 0x42, 0x3b, 0x3d, 0x4a, 0x3f, 0x37, 0x3b, 0x35, 0x34, 0x3f, 0x40, 0x48, + 0x45, 0x3e, 0x37, 0x38, 0x41, 0x41, 0x3d, 0x37, 0x43, 0x3d, 0x3d, 0x45, + 0x3a, 0x38, 0x3f, 0x23, 0x4a, 0x37, 0x42, 0x3c, 0x3f, 0x43, 0x42, 0x33, + 0x37, 0x39, 0x35, 0x3b, 0x41, 0x36, 0x2f, 0x3b, 0x41, 0x3a, 0x44, 0x3d, + 0x3e, 0x45, 0x44, 0x50, 0x47, 0x47, 0x48, 0x3c, 0x3f, 0x45, 0x43, 0x3f, + 0x4a, 0x33, 0x3c, 0x3a, 0x52, 0x52, 0x5a, 0x23, 0x58, 0x31, 0x3b, 0x3b, + 0x47, 0x44, 0x54, 0x45, 0x42, 0x38, 0x38, 0x40, 0x43, 0x3f, 0x2a, 0x46, + 0x3b, 0x46, 0x3b, 0x46, 0x35, 0x37, 0x29, 0x35, 0x38, 0x41, 0x3a, 0x31, + 0x44, 0x41, 0x39, 0x36, 0x45, 0x41, 0x40, 0x3e, 0x40, 0x44, 0x47, 0x37, + 0x3f, 0x42, 0x49, 0x34, 0x46, 0x3d, 0x4b, 0x3d, 0x42, 0x3b, 0x42, 0x3e, + 0x41, 0x3b, 0x3f, 0x43, 0x47, 0x45, 0x47, 0x41, 0x40, 0x3a, 0x3d, 0x45, + 0x40, 0x36, 0x3b, 0x3b, 0x44, 0x37, 0x46, 0x55, 0x35, 0x42, 0x3f, 0x3a, + 0x41, 0x41, 0x44, 0x5c, 0x31, 0x44, 0x3d, 0x46, 0x39, 0x38, 0x46, 0x59, + 0x41, 0x3b, 0x3d, 0x39, 0x33, 0x3e, 0x41, 0x58, 0x33, 0x44, 0x34, 0x31, + 0x48, 0x3e, 0x4d, 0x56, 0x36, 0x3c, 0x37, 0x46, 0x46, 0x38, 0x45, 0x53, + 0x35, 0x3d, 0x3a, 0x31, 0x42, 0x48, 0x45, 0x44, 0x3b, 0x3b, 0x3c, 0x41, + 0x3d, 0x42, 0x3f, 0x2f, 0x38, 0x3c, 0x3e, 0x41, 0x44, 0x3a, 0x4a, 0x24, + 0x37, 0x3e, 0x37, 0x48, 0x40, 0x3f, 0x46, 0x3c, 0x47, 0x4a, 0x38, 0x47, + 0x34, 0x45, 0x31, 0x42, 0x43, 0x44, 0x3f, 0x3f, 0x49, 0x40, 0x3c, 0x41, + 0x4d, 0x43, 0x42, 0x39, 0x39, 0x48, 0x41, 0x38, 0x47, 0x3c, 0x3c, 0x42, + 0x44, 0x55, 0x62, 0x2a, 0x5c, 0x32, 0x3a, 0x37, 0x4c, 0x44, 0x4f, 0x3e, + 0x4e, 0x42, 0x3a, 0x42, 0x41, 0x4a, 0x35, 0x44, 0x45, 0x3b, 0x43, 0x41, + 0x33, 0x38, 0x28, 0x36, 0x40, 0x47, 0x3e, 0x3e, 0x3e, 0x39, 0x3a, 0x37, + 0x44, 0x44, 0x3f, 0x3b, 0x41, 0x3c, 0x45, 0x36, 0x38, 0x3a, 0x3c, 0x42, + 0x42, 0x3f, 0x59, 0x3c, 0x47, 0x3d, 0x38, 0x3a, 0x42, 0x44, 0x41, 0x46, + 0x3f, 0x43, 0x48, 0x42, 0x44, 0x35, 0x3f, 0x45, 0x36, 0x3f, 0x38, 0x3a, + 0x44, 0x3d, 0x3d, 0x4e, 0x3e, 0x45, 0x40, 0x42, 0x3c, 0x33, 0x43, 0x5a, + 0x38, 0x3e, 0x45, 0x3a, 0x3e, 0x42, 0x45, 0x52, 0x3c, 0x42, 0x3a, 0x38, + 0x3d, 0x3b, 0x4a, 0x57, 0x38, 0x37, 0x47, 0x44, 0x3e, 0x3c, 0x38, 0x48, + 0x36, 0x41, 0x3f, 0x41, 0x3a, 0x3a, 0x46, 0x47, 0x42, 0x40, 0x32, 0x33, + 0x43, 0x37, 0x41, 0x43, 0x3e, 0x40, 0x3d, 0x3a, 0x3e, 0x38, 0x42, 0x30, + 0x3e, 0x40, 0x46, 0x42, 0x40, 0x44, 0x42, 0x23, 0x31, 0x40, 0x3f, 0x3d, + 0x3b, 0x33, 0x40, 0x33, 0x41, 0x33, 0x43, 0x41, 0x3a, 0x3e, 0x36, 0x40, + 0x40, 0x45, 0x37, 0x42, 0x46, 0x42, 0x39, 0x48, 0x44, 0x40, 0x40, 0x45, + 0x3c, 0x49, 0x41, 0x3f, 0x4c, 0x3d, 0x2f, 0x3f, 0x47, 0x52, 0x54, 0x2c, + 0x55, 0x42, 0x44, 0x3b, 0x46, 0x4f, 0x48, 0x3c, 0x45, 0x39, 0x3f, 0x4b, + 0x3f, 0x3f, 0x36, 0x42, 0x41, 0x48, 0x44, 0x44, 0x36, 0x3b, 0x37, 0x40, + 0x39, 0x49, 0x3a, 0x35, 0x3e, 0x48, 0x31, 0x30, 0x44, 0x38, 0x4c, 0x3c, + 0x41, 0x3e, 0x46, 0x32, 0x44, 0x3b, 0x42, 0x3c, 0x38, 0x3a, 0x47, 0x3f, + 0x3a, 0x42, 0x3a, 0x43, 0x40, 0x4b, 0x47, 0x3c, 0x42, 0x46, 0x45, 0x42, + 0x3c, 0x46, 0x3d, 0x3f, 0x3e, 0x36, 0x38, 0x3e, 0x46, 0x3c, 0x4d, 0x43, + 0x49, 0x41, 0x48, 0x3c, 0x3d, 0x39, 0x43, 0x58, 0x3a, 0x41, 0x3f, 0x38, + 0x37, 0x3f, 0x46, 0x5d, 0x3c, 0x3c, 0x39, 0x36, 0x3d, 0x46, 0x43, 0x50, + 0x3a, 0x47, 0x39, 0x36, 0x41, 0x3f, 0x3e, 0x51, 0x31, 0x36, 0x3e, 0x3c, + 0x3c, 0x3a, 0x48, 0x41, 0x3a, 0x43, 0x49, 0x3e, 0x42, 0x46, 0x3f, 0x41, + 0x49, 0x33, 0x42, 0x41, 0x45, 0x40, 0x3d, 0x2b, 0x3d, 0x38, 0x40, 0x37, + 0x3a, 0x31, 0x45, 0x26, 0x33, 0x3d, 0x3f, 0x39, 0x36, 0x3c, 0x38, 0x33, + 0x34, 0x3f, 0x35, 0x44, 0x3a, 0x39, 0x32, 0x41, 0x35, 0x40, 0x3c, 0x3b, + 0x4a, 0x3f, 0x3e, 0x3e, 0x4a, 0x3e, 0x42, 0x35, 0x38, 0x43, 0x3c, 0x37, + 0x3d, 0x3c, 0x39, 0x43, 0x3f, 0x4e, 0x54, 0x33, 0x4b, 0x37, 0x43, 0x3b, + 0x43, 0x48, 0x43, 0x42, 0x3d, 0x46, 0x45, 0x49, 0x3a, 0x39, 0x36, 0x4a, + 0x48, 0x48, 0x37, 0x4b, 0x42, 0x47, 0x34, 0x34, 0x43, 0x42, 0x3a, 0x3d, + 0x3c, 0x46, 0x34, 0x39, 0x40, 0x3b, 0x3e, 0x3e, 0x37, 0x3d, 0x53, 0x3b, + 0x48, 0x3c, 0x43, 0x44, 0x3b, 0x3f, 0x57, 0x3d, 0x39, 0x3c, 0x39, 0x3a, + 0x3e, 0x3f, 0x43, 0x3e, 0x41, 0x47, 0x3c, 0x41, 0x40, 0x41, 0x37, 0x3f, + 0x3b, 0x43, 0x35, 0x3e, 0x45, 0x40, 0x47, 0x59, 0x41, 0x49, 0x3b, 0x3f, + 0x47, 0x49, 0x4b, 0x61, 0x39, 0x48, 0x39, 0x3e, 0x44, 0x34, 0x3b, 0x59, + 0x3c, 0x42, 0x45, 0x35, 0x42, 0x41, 0x39, 0x52, 0x42, 0x3c, 0x3d, 0x3e, + 0x3d, 0x4a, 0x4a, 0x4d, 0x3c, 0x34, 0x44, 0x3c, 0x41, 0x34, 0x38, 0x46, + 0x38, 0x45, 0x40, 0x45, 0x40, 0x3a, 0x3d, 0x44, 0x3a, 0x37, 0x3a, 0x3a, + 0x3b, 0x42, 0x40, 0x34, 0x3b, 0x3c, 0x42, 0x40, 0x3d, 0x32, 0x40, 0x27, + 0x37, 0x39, 0x37, 0x46, 0x48, 0x31, 0x40, 0x30, 0x42, 0x42, 0x3a, 0x40, + 0x3d, 0x37, 0x2a, 0x40, 0x41, 0x37, 0x3c, 0x4a, 0x46, 0x45, 0x3d, 0x34, + 0x48, 0x41, 0x42, 0x3e, 0x3f, 0x39, 0x3c, 0x3a, 0x4f, 0x3b, 0x32, 0x3e, + 0x43, 0x51, 0x4f, 0x2a, 0x46, 0x3a, 0x3d, 0x3b, 0x40, 0x3d, 0x4c, 0x3c, + 0x48, 0x40, 0x36, 0x4a, 0x3a, 0x38, 0x42, 0x43, 0x4c, 0x3d, 0x47, 0x47, + 0x33, 0x3f, 0x2d, 0x37, 0x4a, 0x43, 0x38, 0x3e, 0x49, 0x42, 0x42, 0x3d, + 0x43, 0x47, 0x41, 0x38, 0x46, 0x37, 0x46, 0x38, 0x47, 0x42, 0x49, 0x3d, + 0x3b, 0x37, 0x4c, 0x3c, 0x3a, 0x45, 0x3f, 0x37, 0x36, 0x3d, 0x3c, 0x40, + 0x3e, 0x45, 0x46, 0x41, 0x41, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x37, 0x3f, + 0x3e, 0x3a, 0x3a, 0x4b, 0x3a, 0x36, 0x3d, 0x3f, 0x38, 0x3f, 0x3c, 0x58, + 0x40, 0x49, 0x3d, 0x42, 0x38, 0x3a, 0x47, 0x50, 0x3b, 0x49, 0x40, 0x44, + 0x3e, 0x3c, 0x38, 0x52, 0x3a, 0x3e, 0x44, 0x3c, 0x35, 0x44, 0x3a, 0x47, + 0x3e, 0x49, 0x3f, 0x47, 0x45, 0x39, 0x3b, 0x46, 0x44, 0x3e, 0x41, 0x46, + 0x40, 0x41, 0x40, 0x40, 0x3a, 0x35, 0x3e, 0x36, 0x3e, 0x3e, 0x3d, 0x35, + 0x3b, 0x3c, 0x38, 0x46, 0x3b, 0x3c, 0x41, 0x2c, 0x3f, 0x42, 0x38, 0x3b, + 0x36, 0x3b, 0x39, 0x40, 0x40, 0x38, 0x36, 0x33, 0x34, 0x42, 0x2f, 0x44, + 0x41, 0x40, 0x39, 0x35, 0x3b, 0x44, 0x42, 0x2c, 0x41, 0x3b, 0x44, 0x41, + 0x35, 0x44, 0x3b, 0x34, 0x44, 0x49, 0x36, 0x39, 0x3a, 0x52, 0x4d, 0x2b, + 0x40, 0x40, 0x3e, 0x39, 0x48, 0x42, 0x3c, 0x44, 0x46, 0x49, 0x3f, 0x54, + 0x43, 0x40, 0x2e, 0x40, 0x4f, 0x36, 0x3e, 0x3f, 0x38, 0x48, 0x44, 0x3c, + 0x44, 0x43, 0x41, 0x47, 0x40, 0x46, 0x40, 0x37, 0x41, 0x34, 0x3a, 0x41, + 0x41, 0x3b, 0x49, 0x39, 0x42, 0x38, 0x3d, 0x39, 0x34, 0x35, 0x43, 0x36, + 0x3e, 0x44, 0x3f, 0x40, 0x43, 0x40, 0x40, 0x3a, 0x47, 0x42, 0x3e, 0x42, + 0x46, 0x35, 0x3a, 0x46, 0x3c, 0x3c, 0x3c, 0x3d, 0x3f, 0x40, 0x43, 0x4c, + 0x3a, 0x37, 0x3f, 0x43, 0x47, 0x38, 0x42, 0x58, 0x42, 0x3b, 0x34, 0x37, + 0x3e, 0x48, 0x3c, 0x57, 0x44, 0x3c, 0x3d, 0x3a, 0x36, 0x48, 0x3c, 0x51, + 0x3d, 0x48, 0x45, 0x45, 0x38, 0x45, 0x40, 0x3f, 0x3b, 0x35, 0x3d, 0x3f, + 0x38, 0x47, 0x39, 0x3b, 0x36, 0x49, 0x43, 0x40, 0x3f, 0x46, 0x38, 0x40, + 0x3f, 0x3e, 0x39, 0x32, 0x47, 0x42, 0x35, 0x33, 0x39, 0x47, 0x3c, 0x36, + 0x3b, 0x37, 0x43, 0x35, 0x3b, 0x3b, 0x34, 0x3b, 0x38, 0x3d, 0x3e, 0x3a, + 0x35, 0x49, 0x38, 0x40, 0x3f, 0x3f, 0x3e, 0x37, 0x43, 0x3b, 0x3e, 0x3e, + 0x3b, 0x40, 0x44, 0x39, 0x3d, 0x3f, 0x31, 0x42, 0x42, 0x3b, 0x41, 0x3d, + 0x3e, 0x3c, 0x37, 0x34, 0x48, 0x3d, 0x49, 0x4a, 0x47, 0x36, 0x3a, 0x34, + 0x37, 0x36, 0x3e, 0x38, 0x33, 0x45, 0x39, 0x44, 0x34, 0x49, 0x3a, 0x3d, + 0x34, 0x31, 0x31, 0x3d, 0x34, 0x3d, 0x41, 0x3e, 0x49, 0x41, 0x34, 0x3f, + 0x3a, 0x42, 0x3e, 0x40, 0x3f, 0x33, 0x46, 0x3f, 0x34, 0x39, 0x37, 0x46, + 0x3e, 0x32, 0x3f, 0x45, 0x45, 0x41, 0x3b, 0x4b, 0x35, 0x35, 0x3b, 0x4a, + 0x3d, 0x43, 0x3b, 0x44, 0x3c, 0x38, 0x31, 0x43, 0x39, 0x35, 0x41, 0x45, + 0x37, 0x3e, 0x43, 0x47, 0x39, 0x40, 0x41, 0x41, 0x40, 0x32, 0x37, 0x3e, + 0x3d, 0x39, 0x3b, 0x49, 0x33, 0x35, 0x38, 0x41, 0x45, 0x37, 0x3c, 0x49, + 0x3b, 0x34, 0x34, 0x41, 0x3a, 0x3f, 0x3e, 0x47, 0x39, 0x3c, 0x34, 0x3a, + 0x38, 0x44, 0x40, 0x51, 0x3a, 0x37, 0x3b, 0x3f, 0x3d, 0x3a, 0x45, 0x48, + 0x3f, 0x46, 0x35, 0x43, 0x38, 0x43, 0x35, 0x4c, 0x42, 0x47, 0x44, 0x3d, + 0x40, 0x3a, 0x39, 0x4e, 0x3d, 0x37, 0x3c, 0x42, 0x40, 0x48, 0x44, 0x4c, + 0x31, 0x40, 0x42, 0x3b, 0x45, 0x45, 0x3f, 0x3e, 0x3d, 0x44, 0x3f, 0x31, + 0x3f, 0x44, 0x45, 0x37, 0x3e, 0x3d, 0x35, 0x3b, 0x2d, 0x44, 0x4a, 0x3a, + 0x2b, 0x37, 0x38, 0x46, 0x41, 0x39, 0x3c, 0x3c, 0x46, 0x33, 0x36, 0x3c, + 0x4b, 0x34, 0x49, 0x50, 0x30, 0x3c, 0x33, 0x41, 0x44, 0x33, 0x43, 0x39, + 0x36, 0x45, 0x33, 0x3b, 0x3d, 0x36, 0x47, 0x30, 0x42, 0x37, 0x49, 0x3e, + 0x3b, 0x49, 0x3d, 0x3b, 0x3a, 0x41, 0x38, 0x44, 0x42, 0x3b, 0x3f, 0x40, + 0x46, 0x35, 0x38, 0x3c, 0x48, 0x3a, 0x46, 0x41, 0x36, 0x36, 0x41, 0x3e, + 0x43, 0x3e, 0x32, 0x39, 0x3a, 0x41, 0x30, 0x3e, 0x40, 0x3e, 0x36, 0x3a, + 0x45, 0x45, 0x3a, 0x3c, 0x31, 0x3b, 0x47, 0x3f, 0x36, 0x3a, 0x3c, 0x41, + 0x3b, 0x41, 0x39, 0x46, 0x3f, 0x3c, 0x34, 0x3e, 0x41, 0x45, 0x41, 0x42, + 0x39, 0x40, 0x40, 0x44, 0x45, 0x42, 0x34, 0x3f, 0x3e, 0x31, 0x3b, 0x41, + 0x33, 0x43, 0x37, 0x44, 0x44, 0x3a, 0x36, 0x36, 0x48, 0x3c, 0x37, 0x47, + 0x39, 0x3e, 0x3e, 0x3c, 0x3c, 0x41, 0x3c, 0x44, 0x3b, 0x42, 0x3f, 0x3a, + 0x43, 0x3b, 0x3e, 0x48, 0x36, 0x3f, 0x3d, 0x34, 0x40, 0x43, 0x35, 0x4f, + 0x34, 0x39, 0x3b, 0x41, 0x40, 0x39, 0x37, 0x4c, 0x39, 0x36, 0x39, 0x39, + 0x47, 0x41, 0x43, 0x3f, 0x3f, 0x33, 0x42, 0x3f, 0x42, 0x40, 0x37, 0x40, + 0x3f, 0x34, 0x45, 0x3d, 0x2d, 0x3c, 0x44, 0x3b, 0x43, 0x37, 0x26, 0x50, + 0x43, 0x44, 0x3d, 0x43, 0x42, 0x2d, 0x3c, 0x33, 0x4a, 0x32, 0x4a, 0x53, + 0x33, 0x38, 0x27, 0x36, 0x42, 0x30, 0x47, 0x3d, 0x36, 0x45, 0x46, 0x36, + 0x3b, 0x3b, 0x40, 0x33, 0x37, 0x36, 0x44, 0x46, 0x3d, 0x35, 0x40, 0x38, + 0x3b, 0x40, 0x36, 0x3c, 0x3d, 0x37, 0x31, 0x41, 0x33, 0x3c, 0x38, 0x3f, + 0x43, 0x3a, 0x40, 0x49, 0x38, 0x39, 0x38, 0x3d, 0x43, 0x3d, 0x39, 0x3b, + 0x3d, 0x3f, 0x38, 0x42, 0x34, 0x43, 0x33, 0x3e, 0x43, 0x3e, 0x40, 0x42, + 0x3b, 0x45, 0x37, 0x44, 0x43, 0x39, 0x3c, 0x3d, 0x37, 0x44, 0x3a, 0x3b, + 0x47, 0x3f, 0x3a, 0x3c, 0x3a, 0x3b, 0x3f, 0x43, 0x3e, 0x3d, 0x46, 0x3e, + 0x37, 0x36, 0x3f, 0x40, 0x42, 0x42, 0x37, 0x36, 0x48, 0x35, 0x44, 0x44, + 0x39, 0x3c, 0x3b, 0x41, 0x44, 0x49, 0x3a, 0x40, 0x41, 0x36, 0x33, 0x3a, + 0x3c, 0x3d, 0x40, 0x3f, 0x43, 0x36, 0x3c, 0x3a, 0x3f, 0x4b, 0x32, 0x49, + 0x49, 0x3e, 0x3a, 0x3e, 0x3f, 0x41, 0x3c, 0x47, 0x40, 0x41, 0x45, 0x3e, + 0x47, 0x47, 0x3f, 0x47, 0x45, 0x3e, 0x31, 0x43, 0x4a, 0x44, 0x36, 0x40, + 0x41, 0x47, 0x3e, 0x42, 0x37, 0x40, 0x3b, 0x46, 0x37, 0x41, 0x3e, 0x3c, + 0x27, 0x40, 0x49, 0x42, 0x42, 0x39, 0x30, 0x49, 0x43, 0x38, 0x3d, 0x42, + 0x43, 0x2f, 0x3b, 0x37, 0x4b, 0x2d, 0x4f, 0x52, 0x30, 0x31, 0x2f, 0x3a, + 0x49, 0x38, 0x4f, 0x45, 0x2e, 0x47, 0x3a, 0x32, 0x33, 0x3f, 0x4a, 0x2e, + 0x33, 0x3b, 0x3e, 0x3e, 0x49, 0x45, 0x44, 0x38, 0x3c, 0x35, 0x45, 0x47, + 0x41, 0x3b, 0x3c, 0x48, 0x46, 0x39, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x42, + 0x3d, 0x46, 0x33, 0x41, 0x36, 0x3f, 0x3f, 0x3c, 0x33, 0x3e, 0x3e, 0x40, + 0x44, 0x40, 0x3c, 0x38, 0x46, 0x3a, 0x40, 0x36, 0x42, 0x35, 0x3f, 0x3b, + 0x3b, 0x43, 0x3c, 0x40, 0x40, 0x49, 0x2e, 0x39, 0x40, 0x3f, 0x45, 0x41, + 0x3f, 0x30, 0x42, 0x3d, 0x40, 0x3c, 0x3a, 0x3b, 0x3b, 0x40, 0x39, 0x42, + 0x3a, 0x3f, 0x3f, 0x3e, 0x35, 0x3b, 0x38, 0x45, 0x47, 0x35, 0x44, 0x3e, + 0x3b, 0x3f, 0x3f, 0x40, 0x3a, 0x35, 0x30, 0x49, 0x45, 0x35, 0x3b, 0x39, + 0x3b, 0x48, 0x3f, 0x37, 0x39, 0x40, 0x43, 0x45, 0x3d, 0x40, 0x41, 0x3a, + 0x33, 0x3d, 0x3a, 0x4b, 0x40, 0x42, 0x40, 0x42, 0x43, 0x39, 0x3c, 0x49, + 0x3e, 0x47, 0x3e, 0x44, 0x3f, 0x3a, 0x40, 0x41, 0x3f, 0x42, 0x42, 0x37, + 0x3e, 0x3b, 0x36, 0x3e, 0x3b, 0x3c, 0x48, 0x43, 0x2d, 0x46, 0x4a, 0x38, + 0x45, 0x3a, 0x29, 0x46, 0x40, 0x3c, 0x40, 0x44, 0x40, 0x33, 0x2f, 0x33, + 0x48, 0x2e, 0x51, 0x4f, 0x2b, 0x32, 0x2e, 0x2d, 0x45, 0x33, 0x4d, 0x41, + 0x29, 0x4b, 0x41, 0x39, 0x2f, 0x3a, 0x49, 0x31, 0x37, 0x40, 0x47, 0x4c, + 0x3e, 0x31, 0x41, 0x3f, 0x43, 0x37, 0x45, 0x4f, 0x41, 0x3c, 0x30, 0x4a, + 0x37, 0x37, 0x36, 0x39, 0x31, 0x3d, 0x36, 0x4b, 0x37, 0x44, 0x3c, 0x43, + 0x44, 0x36, 0x3f, 0x3b, 0x34, 0x3e, 0x3a, 0x35, 0x38, 0x3f, 0x33, 0x37, + 0x3b, 0x3d, 0x46, 0x38, 0x3b, 0x37, 0x38, 0x3b, 0x31, 0x3e, 0x3d, 0x3b, + 0x3d, 0x39, 0x35, 0x33, 0x33, 0x3c, 0x39, 0x39, 0x48, 0x39, 0x39, 0x3f, + 0x3e, 0x36, 0x47, 0x3a, 0x44, 0x40, 0x32, 0x3c, 0x37, 0x35, 0x40, 0x3f, + 0x3a, 0x38, 0x3b, 0x3d, 0x46, 0x45, 0x36, 0x43, 0x40, 0x3d, 0x41, 0x41, + 0x47, 0x3a, 0x3d, 0x3e, 0x43, 0x42, 0x32, 0x36, 0x41, 0x37, 0x3b, 0x35, + 0x36, 0x44, 0x36, 0x3c, 0x43, 0x32, 0x3e, 0x3e, 0x42, 0x45, 0x32, 0x3c, + 0x3a, 0x3b, 0x35, 0x43, 0x41, 0x3d, 0x44, 0x50, 0x43, 0x31, 0x3e, 0x44, + 0x44, 0x41, 0x3a, 0x44, 0x36, 0x39, 0x3b, 0x3c, 0x32, 0x38, 0x3b, 0x45, + 0x38, 0x43, 0x40, 0x42, 0x33, 0x3e, 0x4a, 0x42, 0x45, 0x39, 0x2f, 0x42, + 0x39, 0x35, 0x44, 0x3e, 0x39, 0x2f, 0x34, 0x33, 0x49, 0x29, 0x50, 0x4f, + 0x2b, 0x36, 0x34, 0x2d, 0x47, 0x33, 0x49, 0x3c, 0x33, 0x51, 0x49, 0x3f, + 0x34, 0x39, 0x4a, 0x2c, 0x34, 0x45, 0x4f, 0x47, 0x34, 0x42, 0x3a, 0x3d, + 0x36, 0x4a, 0x3b, 0x43, 0x36, 0x3f, 0x39, 0x4b, 0x38, 0x3a, 0x31, 0x3d, + 0x32, 0x42, 0x3a, 0x47, 0x48, 0x3e, 0x44, 0x3f, 0x39, 0x3e, 0x44, 0x35, + 0x41, 0x3c, 0x45, 0x3a, 0x3e, 0x3b, 0x3d, 0x2f, 0x37, 0x40, 0x3e, 0x43, + 0x39, 0x39, 0x33, 0x3b, 0x37, 0x3b, 0x37, 0x37, 0x37, 0x39, 0x36, 0x31, + 0x39, 0x3b, 0x41, 0x39, 0x3b, 0x40, 0x36, 0x37, 0x42, 0x39, 0x3a, 0x46, + 0x3f, 0x30, 0x38, 0x39, 0x35, 0x32, 0x3e, 0x3a, 0x43, 0x43, 0x3e, 0x33, + 0x42, 0x3f, 0x41, 0x3c, 0x46, 0x34, 0x34, 0x40, 0x43, 0x37, 0x32, 0x43, + 0x3c, 0x37, 0x36, 0x33, 0x3d, 0x36, 0x3a, 0x40, 0x39, 0x38, 0x32, 0x3e, + 0x32, 0x3d, 0x37, 0x49, 0x42, 0x47, 0x41, 0x3b, 0x3d, 0x3c, 0x3a, 0x37, + 0x3c, 0x45, 0x3a, 0x45, 0x36, 0x44, 0x3a, 0x3a, 0x3a, 0x3c, 0x43, 0x3b, + 0x3b, 0x35, 0x38, 0x47, 0x36, 0x40, 0x32, 0x43, 0x3e, 0x39, 0x42, 0x40, + 0x2c, 0x3c, 0x4c, 0x4c, 0x43, 0x3b, 0x37, 0x4a, 0x3f, 0x3c, 0x45, 0x44, + 0x3f, 0x30, 0x36, 0x31, 0x4f, 0x2f, 0x5d, 0x4b, 0x34, 0x34, 0x2d, 0x2b, + 0x44, 0x31, 0x4e, 0x40, 0x2e, 0x4d, 0x48, 0x3e, 0x37, 0x2b, 0x49, 0x25, + 0x31, 0x49, 0x44, 0x49, 0x39, 0x39, 0x4b, 0x3a, 0x3a, 0x41, 0x3e, 0x42, + 0x3c, 0x36, 0x36, 0x4a, 0x32, 0x44, 0x3e, 0x48, 0x3e, 0x3c, 0x37, 0x49, + 0x3d, 0x34, 0x3f, 0x37, 0x33, 0x36, 0x46, 0x3a, 0x3a, 0x31, 0x45, 0x3f, + 0x3a, 0x31, 0x3b, 0x33, 0x41, 0x42, 0x35, 0x39, 0x38, 0x44, 0x36, 0x3a, + 0x3f, 0x3b, 0x37, 0x3e, 0x3b, 0x38, 0x2f, 0x32, 0x44, 0x3d, 0x44, 0x41, + 0x39, 0x36, 0x3a, 0x34, 0x39, 0x38, 0x34, 0x3f, 0x3b, 0x37, 0x34, 0x34, + 0x40, 0x3d, 0x34, 0x3a, 0x46, 0x42, 0x3f, 0x34, 0x38, 0x33, 0x39, 0x44, + 0x3f, 0x41, 0x3c, 0x31, 0x40, 0x32, 0x3f, 0x37, 0x37, 0x41, 0x3e, 0x35, + 0x37, 0x48, 0x3b, 0x41, 0x3d, 0x3a, 0x3f, 0x35, 0x33, 0x3c, 0x36, 0x3b, + 0x3a, 0x48, 0x33, 0x42, 0x37, 0x33, 0x39, 0x41, 0x3c, 0x3d, 0x3b, 0x4d, + 0x39, 0x3a, 0x3e, 0x44, 0x3d, 0x41, 0x3b, 0x38, 0x49, 0x41, 0x3a, 0x38, + 0x34, 0x38, 0x38, 0x3c, 0x45, 0x3c, 0x37, 0x3b, 0x36, 0x3e, 0x4a, 0x4b, + 0x42, 0x3f, 0x32, 0x45, 0x46, 0x35, 0x46, 0x41, 0x38, 0x33, 0x39, 0x37, + 0x44, 0x2b, 0x60, 0x4a, 0x2a, 0x2e, 0x35, 0x2d, 0x43, 0x37, 0x51, 0x47, + 0x2f, 0x4d, 0x50, 0x3e, 0x3a, 0x33, 0x4f, 0x2a, 0x35, 0x45, 0x4a, 0x4c, + 0x3b, 0x3d, 0x43, 0x44, 0x3d, 0x3f, 0x4a, 0x3e, 0x49, 0x37, 0x2e, 0x4f, + 0x39, 0x3f, 0x32, 0x3c, 0x37, 0x3b, 0x39, 0x4d, 0x34, 0x3f, 0x46, 0x44, + 0x3d, 0x40, 0x3f, 0x40, 0x39, 0x33, 0x39, 0x3e, 0x3d, 0x40, 0x31, 0x30, + 0x35, 0x3d, 0x3e, 0x3a, 0x3e, 0x32, 0x31, 0x3e, 0x48, 0x3c, 0x40, 0x43, + 0x3f, 0x3f, 0x34, 0x2e, 0x3a, 0x3e, 0x3b, 0x43, 0x45, 0x32, 0x3a, 0x31, + 0x37, 0x38, 0x31, 0x35, 0x34, 0x3d, 0x42, 0x36, 0x46, 0x37, 0x32, 0x47, + 0x41, 0x3c, 0x35, 0x35, 0x36, 0x41, 0x3a, 0x3b, 0x42, 0x44, 0x36, 0x31, + 0x3c, 0x3d, 0x34, 0x34, 0x3b, 0x40, 0x40, 0x2e, 0x40, 0x46, 0x3b, 0x43, + 0x3f, 0x40, 0x3b, 0x3a, 0x32, 0x40, 0x46, 0x39, 0x3c, 0x49, 0x2f, 0x3d, + 0x49, 0x3e, 0x44, 0x3c, 0x3e, 0x35, 0x3f, 0x44, 0x41, 0x40, 0x3e, 0x47, + 0x3d, 0x40, 0x3f, 0x41, 0x3b, 0x41, 0x41, 0x3f, 0x40, 0x3f, 0x3e, 0x3e, + 0x3f, 0x43, 0x35, 0x40, 0x2b, 0x42, 0x45, 0x56, 0x40, 0x3c, 0x2f, 0x44, + 0x44, 0x3d, 0x3e, 0x3d, 0x40, 0x2d, 0x39, 0x31, 0x54, 0x2f, 0x61, 0x48, + 0x2e, 0x37, 0x37, 0x32, 0x3e, 0x2d, 0x52, 0x4d, 0x2d, 0x4d, 0x4c, 0x3a, + 0x3a, 0x31, 0x4e, 0x2d, 0x31, 0x48, 0x47, 0x54, 0x45, 0x38, 0x3b, 0x3d, + 0x42, 0x41, 0x44, 0x4a, 0x48, 0x42, 0x2f, 0x4d, 0x31, 0x34, 0x3a, 0x46, + 0x37, 0x44, 0x2c, 0x45, 0x46, 0x43, 0x40, 0x3f, 0x34, 0x33, 0x40, 0x39, + 0x32, 0x35, 0x3a, 0x40, 0x3f, 0x3f, 0x36, 0x32, 0x3f, 0x3d, 0x35, 0x48, + 0x3c, 0x48, 0x37, 0x39, 0x35, 0x3f, 0x37, 0x3d, 0x44, 0x46, 0x2d, 0x2a, + 0x47, 0x38, 0x3a, 0x39, 0x45, 0x3b, 0x40, 0x2d, 0x37, 0x33, 0x41, 0x3c, + 0x40, 0x35, 0x3f, 0x32, 0x3a, 0x36, 0x40, 0x41, 0x3a, 0x3c, 0x33, 0x31, + 0x42, 0x3f, 0x41, 0x3a, 0x41, 0x46, 0x38, 0x2f, 0x3c, 0x3d, 0x3d, 0x39, + 0x3b, 0x46, 0x41, 0x31, 0x46, 0x36, 0x40, 0x48, 0x3c, 0x33, 0x42, 0x32, + 0x3b, 0x40, 0x3f, 0x36, 0x37, 0x44, 0x34, 0x35, 0x32, 0x32, 0x37, 0x38, + 0x33, 0x3b, 0x37, 0x4a, 0x3f, 0x46, 0x3a, 0x41, 0x32, 0x37, 0x30, 0x3e, + 0x40, 0x35, 0x41, 0x40, 0x37, 0x41, 0x2b, 0x40, 0x3d, 0x3d, 0x32, 0x38, + 0x34, 0x3e, 0x47, 0x61, 0x43, 0x3b, 0x3c, 0x42, 0x46, 0x3d, 0x40, 0x4a, + 0x3c, 0x2d, 0x33, 0x35, 0x55, 0x38, 0x69, 0x4f, 0x33, 0x37, 0x30, 0x39, + 0x44, 0x2e, 0x58, 0x4b, 0x2a, 0x51, 0x4b, 0x3c, 0x39, 0x2e, 0x51, 0x2d, + 0x30, 0x4a, 0x42, 0x53, 0x3f, 0x39, 0x3e, 0x44, 0x3b, 0x40, 0x47, 0x44, + 0x47, 0x3e, 0x39, 0x4b, 0x40, 0x3d, 0x42, 0x39, 0x3b, 0x39, 0x32, 0x42, + 0x36, 0x36, 0x36, 0x42, 0x44, 0x34, 0x33, 0x40, 0x40, 0x40, 0x3a, 0x3a, + 0x41, 0x3f, 0x31, 0x30, 0x3f, 0x31, 0x30, 0x39, 0x46, 0x36, 0x35, 0x34, + 0x40, 0x43, 0x3c, 0x41, 0x31, 0x46, 0x35, 0x26, 0x44, 0x32, 0x3d, 0x35, + 0x3d, 0x3c, 0x36, 0x32, 0x39, 0x3a, 0x30, 0x40, 0x48, 0x3e, 0x38, 0x37, + 0x44, 0x3b, 0x3d, 0x42, 0x3d, 0x3c, 0x32, 0x2b, 0x3f, 0x41, 0x39, 0x3d, + 0x3e, 0x3f, 0x35, 0x2f, 0x46, 0x3d, 0x3d, 0x3b, 0x45, 0x37, 0x31, 0x35, + 0x44, 0x40, 0x3a, 0x45, 0x3a, 0x3c, 0x39, 0x31, 0x3b, 0x3d, 0x3b, 0x3a, + 0x43, 0x44, 0x39, 0x47, 0x44, 0x36, 0x3e, 0x39, 0x48, 0x3f, 0x39, 0x4b, + 0x3c, 0x36, 0x3d, 0x44, 0x44, 0x3f, 0x39, 0x43, 0x3f, 0x37, 0x3f, 0x37, + 0x3b, 0x3b, 0x38, 0x3b, 0x3f, 0x40, 0x31, 0x44, 0x30, 0x44, 0x46, 0x5b, + 0x46, 0x3f, 0x39, 0x40, 0x40, 0x37, 0x4a, 0x46, 0x3f, 0x36, 0x40, 0x39, + 0x59, 0x3e, 0x66, 0x57, 0x32, 0x34, 0x2e, 0x33, 0x46, 0x31, 0x58, 0x44, + 0x26, 0x4c, 0x4b, 0x3c, 0x39, 0x2e, 0x4d, 0x35, 0x32, 0x46, 0x52, 0x52, + 0x3e, 0x40, 0x39, 0x3c, 0x39, 0x3d, 0x53, 0x48, 0x41, 0x3c, 0x3b, 0x4d, + 0x3c, 0x3e, 0x38, 0x44, 0x3a, 0x3a, 0x29, 0x4a, 0x3c, 0x37, 0x36, 0x38, + 0x3a, 0x31, 0x37, 0x39, 0x3a, 0x40, 0x46, 0x32, 0x42, 0x38, 0x32, 0x2e, + 0x3a, 0x45, 0x44, 0x34, 0x34, 0x38, 0x32, 0x2e, 0x35, 0x40, 0x3a, 0x41, + 0x42, 0x3d, 0x37, 0x2c, 0x3f, 0x37, 0x3c, 0x3d, 0x3a, 0x36, 0x33, 0x35, + 0x3c, 0x34, 0x3c, 0x39, 0x3c, 0x3a, 0x37, 0x30, 0x30, 0x3e, 0x3d, 0x3a, + 0x44, 0x37, 0x36, 0x32, 0x36, 0x37, 0x36, 0x3a, 0x3c, 0x41, 0x3a, 0x35, + 0x36, 0x3a, 0x34, 0x40, 0x39, 0x40, 0x3e, 0x32, 0x34, 0x46, 0x33, 0x3f, + 0x36, 0x45, 0x3e, 0x35, 0x3f, 0x38, 0x3f, 0x3e, 0x3b, 0x3a, 0x36, 0x3b, + 0x36, 0x38, 0x32, 0x3f, 0x44, 0x3c, 0x35, 0x48, 0x38, 0x39, 0x31, 0x49, + 0x3d, 0x43, 0x36, 0x3f, 0x31, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x39, 0x3b, + 0x40, 0x42, 0x3c, 0x43, 0x36, 0x4a, 0x48, 0x67, 0x4e, 0x43, 0x36, 0x46, + 0x44, 0x3f, 0x4b, 0x4b, 0x3f, 0x38, 0x3c, 0x3c, 0x5e, 0x38, 0x70, 0x52, + 0x38, 0x32, 0x3b, 0x36, 0x4a, 0x2c, 0x52, 0x46, 0x29, 0x4f, 0x48, 0x42, + 0x2d, 0x2e, 0x4f, 0x28, 0x28, 0x45, 0x4d, 0x52, 0x42, 0x3e, 0x3f, 0x41, + 0x3c, 0x3a, 0x47, 0x50, 0x44, 0x45, 0x33, 0x4b, 0x3e, 0x3f, 0x42, 0x3d, + 0x43, 0x34, 0x27, 0x3f, 0x42, 0x3e, 0x43, 0x3e, 0x3a, 0x3c, 0x37, 0x3b, + 0x3f, 0x30, 0x3a, 0x3e, 0x3c, 0x34, 0x37, 0x24, 0x3d, 0x43, 0x40, 0x44, + 0x40, 0x46, 0x31, 0x2f, 0x43, 0x38, 0x38, 0x39, 0x3c, 0x34, 0x2d, 0x2a, + 0x38, 0x31, 0x43, 0x3b, 0x39, 0x3b, 0x32, 0x34, 0x3e, 0x39, 0x41, 0x3b, + 0x3e, 0x33, 0x3a, 0x2a, 0x41, 0x3f, 0x3c, 0x43, 0x3b, 0x3e, 0x35, 0x2c, + 0x38, 0x41, 0x33, 0x31, 0x3e, 0x3f, 0x3a, 0x3c, 0x3b, 0x35, 0x3f, 0x3d, + 0x42, 0x3a, 0x3c, 0x35, 0x3f, 0x40, 0x3c, 0x3e, 0x37, 0x41, 0x3d, 0x38, + 0x34, 0x31, 0x36, 0x3d, 0x3d, 0x47, 0x36, 0x44, 0x3f, 0x45, 0x3c, 0x3c, + 0x35, 0x36, 0x31, 0x4f, 0x46, 0x3a, 0x41, 0x42, 0x40, 0x32, 0x33, 0x41, + 0x34, 0x40, 0x3d, 0x43, 0x3b, 0x3a, 0x32, 0x3c, 0x42, 0x42, 0x3d, 0x43, + 0x37, 0x45, 0x45, 0xff, 0x4b, 0x45, 0x3b, 0x40, 0x43, 0x3e, 0x47, 0x49, + 0x3d, 0x3b, 0x3e, 0x33, 0x58, 0x35, 0x71, 0x54, 0x2f, 0x38, 0x38, 0x33, + 0x47, 0x35, 0x5b, 0x46, 0x2c, 0x4c, 0x43, 0x37, 0x36, 0x39, 0x4f, 0x30, + 0x26, 0x48, 0x51, 0x48, 0x46, 0x45, 0x3b, 0x39, 0x42, 0x50, 0x47, 0x4c, + 0x4b, 0x3b, 0x3d, 0x4d, 0x41, 0x34, 0x40, 0x44, 0x38, 0x32, 0x2d, 0x43, + 0x39, 0x36, 0x3b, 0x3b, 0x40, 0x3d, 0x37, 0x3c, 0x44, 0x39, 0x42, 0x37, + 0x38, 0x38, 0x32, 0x2f, 0x41, 0x40, 0x3f, 0x3a, 0x37, 0x35, 0x3b, 0x2a, + 0x37, 0x30, 0x3c, 0x37, 0x40, 0x38, 0x3a, 0x27, 0x44, 0x3d, 0x43, 0x40, + 0x35, 0x3f, 0x3e, 0x32, 0x3e, 0x3c, 0x40, 0x39, 0x39, 0x3a, 0x41, 0x31, + 0x3b, 0x3f, 0x34, 0x43, 0x3a, 0x38, 0x42, 0x2a, 0x47, 0x46, 0x3b, 0x38, + 0x47, 0x45, 0x39, 0x31, 0x43, 0x40, 0x37, 0x3a, 0x3d, 0x3e, 0x39, 0x30, + 0x36, 0x37, 0x3a, 0x43, 0x3f, 0x32, 0x31, 0x41, 0x45, 0x3e, 0x43, 0x38, + 0x3f, 0x37, 0x3c, 0x49, 0x3b, 0x33, 0x3d, 0x3a, 0x37, 0x44, 0x32, 0x50, + 0x39, 0x44, 0x3e, 0x3f, 0x3d, 0x41, 0x3e, 0x3e, 0x42, 0x44, 0x45, 0x3f, + 0x36, 0x3f, 0x37, 0x39, 0x3b, 0x3d, 0x3b, 0x3b, 0x2f, 0x46, 0x40, 0x6d, + 0x50, 0x45, 0x3b, 0x45, 0x46, 0x3b, 0x42, 0x48, 0x42, 0x3c, 0x39, 0x37, + 0x57, 0x3b, 0x6c, 0x5b, 0x32, 0x35, 0x3d, 0x39, 0x48, 0x31, 0x5c, 0x46, + 0x29, 0x4c, 0x3f, 0x3e, 0x37, 0x33, 0x58, 0x32, 0x2a, 0x43, 0x4c, 0x50, + 0x3b, 0x44, 0x3c, 0x41, 0x39, 0x48, 0x55, 0x4c, 0x42, 0x38, 0x3b, 0x51, + 0x3f, 0x38, 0x44, 0x46, 0x36, 0x3b, 0x38, 0x4a, 0x3f, 0x37, 0x36, 0x3c, + 0x31, 0x3d, 0x32, 0x39, 0x3b, 0x3f, 0x3e, 0x35, 0x38, 0x3f, 0x34, 0x2b, + 0x37, 0x36, 0x39, 0x40, 0x37, 0x41, 0x32, 0x27, 0x36, 0x33, 0x40, 0x3a, + 0x3f, 0x44, 0x3f, 0x25, 0x38, 0x34, 0x42, 0x3c, 0x3a, 0x40, 0x38, 0x31, + 0x49, 0x3e, 0x33, 0x3d, 0x31, 0x36, 0x39, 0x2b, 0x44, 0x2f, 0x43, 0x34, + 0x34, 0x37, 0x39, 0x33, 0x3b, 0x34, 0x42, 0x3c, 0x40, 0x45, 0x36, 0x31, + 0x43, 0x47, 0x3e, 0x3f, 0x40, 0x3a, 0x33, 0x34, 0x41, 0x44, 0x3a, 0x43, + 0x3e, 0x38, 0x36, 0x31, 0x42, 0x44, 0x40, 0x41, 0x44, 0x43, 0x33, 0x42, + 0x3d, 0x41, 0x3d, 0x3e, 0x3c, 0x39, 0x3e, 0x4f, 0x3f, 0x37, 0x31, 0x40, + 0x3b, 0x38, 0x35, 0x3b, 0x44, 0x41, 0x41, 0x37, 0x40, 0x42, 0x2d, 0x3d, + 0x39, 0x48, 0x44, 0x3e, 0x34, 0x48, 0x49, 0x6d, 0x45, 0x4b, 0x3a, 0x44, + 0x49, 0x40, 0x4d, 0x51, 0x3f, 0x34, 0x3b, 0x40, 0x52, 0x34, 0x6f, 0x56, + 0x33, 0x3e, 0x40, 0x39, 0x41, 0x32, 0x5d, 0x45, 0x2e, 0x51, 0x48, 0x3c, + 0x2e, 0x2e, 0x51, 0x39, 0x32, 0x45, 0x4a, 0x4c, 0x3b, 0x40, 0x40, 0x3b, + 0x36, 0x41, 0x54, 0x4e, 0x4a, 0x49, 0x3b, 0x4d, 0x3c, 0x41, 0x38, 0x47, + 0x3d, 0x3c, 0x37, 0x48, 0x3f, 0x42, 0x3e, 0x36, 0x39, 0x46, 0x37, 0x3e, + 0x3b, 0x38, 0x40, 0x3b, 0x39, 0x32, 0x3e, 0x29, 0x37, 0x35, 0x3c, 0x3d, + 0x37, 0x3b, 0x35, 0x2f, 0x32, 0x3b, 0x37, 0x3c, 0x40, 0x3e, 0x39, 0x27, + 0x3b, 0x38, 0x37, 0x36, 0x39, 0x37, 0x37, 0x35, 0x42, 0x3e, 0x3b, 0x43, + 0x41, 0x3c, 0x37, 0x2a, 0x3a, 0x3e, 0x38, 0x40, 0x36, 0x3e, 0x44, 0x2e, + 0x3e, 0x3a, 0x37, 0x3b, 0x3e, 0x41, 0x3d, 0x30, 0x3b, 0x3f, 0x41, 0x45, + 0x3a, 0x48, 0x37, 0x2f, 0x3a, 0x37, 0x34, 0x43, 0x42, 0x3d, 0x38, 0x41, + 0x3b, 0x3c, 0x39, 0x3c, 0x39, 0x47, 0x2e, 0x41, 0x42, 0x40, 0x32, 0x36, + 0x43, 0x40, 0x3d, 0x4c, 0x38, 0x3e, 0x3b, 0x41, 0x3d, 0x3b, 0x34, 0x43, + 0x43, 0x3f, 0x44, 0x3c, 0x3a, 0x33, 0x39, 0x42, 0x43, 0x3f, 0x33, 0x3d, + 0x33, 0x3e, 0x48, 0x6b, 0x48, 0x43, 0x36, 0x47, 0x49, 0x44, 0x4a, 0x49, + 0x3c, 0x31, 0x35, 0x3e, 0x5c, 0x34, 0x73, 0x53, 0x33, 0x3c, 0x32, 0x3b, + 0x43, 0x27, 0x59, 0x4e, 0x2b, 0x51, 0x4f, 0x37, 0x36, 0x34, 0x56, 0x34, + 0x32, 0x4f, 0x46, 0x50, 0x40, 0x40, 0x3c, 0x3e, 0x34, 0x37, 0x50, 0x49, + 0x43, 0x47, 0x3e, 0x52, 0x44, 0x38, 0x3b, 0x4f, 0x3a, 0x3d, 0x2b, 0x4c, + 0x40, 0x38, 0x3a, 0x35, 0x3a, 0x3a, 0x3d, 0x38, 0x3d, 0x3b, 0x37, 0x48, + 0x3d, 0x3d, 0x32, 0x30, 0x3a, 0x34, 0x3f, 0x3a, 0x3b, 0x3e, 0x35, 0x2f, + 0x3b, 0x3a, 0x45, 0x3d, 0x42, 0x33, 0x33, 0x24, 0x44, 0x39, 0x3c, 0x3d, + 0x41, 0x3c, 0x37, 0x2c, 0x3b, 0x36, 0x34, 0x41, 0x3d, 0x3f, 0x39, 0x32, + 0x3c, 0x40, 0x44, 0x3d, 0x41, 0x3d, 0x3a, 0x29, 0x3e, 0x3e, 0x43, 0x33, + 0x3f, 0x3e, 0x3e, 0x31, 0x38, 0x3a, 0x34, 0x3d, 0x3f, 0x3e, 0x3a, 0x3d, + 0x3e, 0x48, 0x45, 0x3d, 0x44, 0x37, 0x33, 0x3d, 0x45, 0x39, 0x40, 0x40, + 0x42, 0x3f, 0x3f, 0x3d, 0x3a, 0x3b, 0x41, 0x33, 0x41, 0x3c, 0x32, 0x55, + 0x43, 0x3a, 0x32, 0x40, 0x3c, 0x3e, 0x40, 0x43, 0x37, 0x3f, 0x40, 0x38, + 0x43, 0x41, 0x36, 0x42, 0x44, 0x3c, 0x32, 0x3f, 0x38, 0x42, 0x46, 0x59, + 0x4c, 0x41, 0x39, 0x47, 0x46, 0x46, 0x44, 0x44, 0x35, 0x42, 0x32, 0x39, + 0x4f, 0x34, 0x6d, 0x55, 0x31, 0x3b, 0x3a, 0x3f, 0x44, 0x2c, 0x5d, 0x43, + 0x26, 0x4a, 0x4f, 0x40, 0x36, 0x32, 0x4d, 0x33, 0x2f, 0x50, 0x4d, 0x57, + 0x3b, 0x40, 0x42, 0x44, 0x41, 0x3f, 0x52, 0x4e, 0x35, 0x41, 0x44, 0x52, + 0x40, 0x35, 0x39, 0x4b, 0x45, 0x34, 0x2c, 0x4a, 0x3b, 0x41, 0x31, 0x33, + 0x3f, 0x3a, 0x36, 0x3c, 0x3c, 0x33, 0x30, 0x38, 0x43, 0x3f, 0x32, 0x2d, + 0x3f, 0x3a, 0x38, 0x41, 0x39, 0x45, 0x36, 0x2e, 0x3c, 0x38, 0x45, 0x3f, + 0x40, 0x3f, 0x3e, 0x26, 0x41, 0x37, 0x3c, 0x44, 0x3f, 0x3f, 0x35, 0x37, + 0x46, 0x34, 0x37, 0x3e, 0x48, 0x38, 0x36, 0x34, 0x33, 0x39, 0x40, 0x3c, + 0x42, 0x3d, 0x3b, 0x31, 0x38, 0x3b, 0x44, 0x42, 0x45, 0x38, 0x41, 0x30, + 0x3d, 0x42, 0x36, 0x3f, 0x3b, 0x45, 0x37, 0x32, 0x3c, 0x37, 0x3d, 0x42, + 0x38, 0x3d, 0x2f, 0x31, 0x39, 0x40, 0x3f, 0x44, 0x3a, 0x41, 0x44, 0x46, + 0x3d, 0x3a, 0x32, 0x3b, 0x34, 0x47, 0x36, 0x4c, 0x47, 0x35, 0x3c, 0x33, + 0x3b, 0x3c, 0x30, 0x43, 0x43, 0x3f, 0x31, 0x40, 0x3a, 0x37, 0x30, 0x46, + 0x39, 0x3b, 0x42, 0x40, 0x2d, 0x3f, 0x3e, 0x6a, 0x50, 0x3b, 0x31, 0x54, + 0x47, 0x3d, 0x48, 0x4e, 0x3b, 0x41, 0x3a, 0x39, 0x49, 0x36, 0x64, 0x4e, + 0x32, 0x39, 0x3d, 0x37, 0x42, 0x2c, 0x5c, 0x43, 0x2a, 0x4b, 0x4b, 0x46, + 0x30, 0x29, 0x52, 0x31, 0x35, 0x44, 0x4a, 0x4b, 0x3d, 0x3b, 0x4e, 0x42, + 0x3d, 0x39, 0x42, 0x52, 0x3f, 0x36, 0x3e, 0x50, 0x3f, 0x32, 0x35, 0x3a, + 0x40, 0x39, 0x35, 0x48, 0x3b, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x2f, 0x36, + 0x38, 0x34, 0x3f, 0x44, 0x32, 0x3f, 0x37, 0x33, 0x33, 0x35, 0x2e, 0x41, + 0x37, 0x3e, 0x38, 0x28, 0x49, 0x30, 0x46, 0x39, 0x3b, 0x30, 0x38, 0x28, + 0x3b, 0x3d, 0x3a, 0x43, 0x3f, 0x34, 0x43, 0x36, 0x39, 0x3c, 0x3e, 0x3e, + 0x39, 0x3b, 0x39, 0x32, 0x3c, 0x36, 0x3e, 0x38, 0x34, 0x3c, 0x3a, 0x2a, + 0x46, 0x3d, 0x40, 0x37, 0x3b, 0x39, 0x3b, 0x34, 0x38, 0x31, 0x43, 0x46, + 0x3b, 0x43, 0x39, 0x2b, 0x38, 0x40, 0x3e, 0x39, 0x35, 0x3d, 0x2c, 0x36, + 0x37, 0x40, 0x36, 0x40, 0x41, 0x38, 0x32, 0x3f, 0x36, 0x46, 0x34, 0x31, + 0x40, 0x3e, 0x3c, 0x4e, 0x42, 0x3d, 0x36, 0x3f, 0x42, 0x3f, 0x33, 0x40, + 0x34, 0x37, 0x3c, 0x3b, 0x31, 0x47, 0x32, 0x3c, 0x34, 0x3d, 0x42, 0x3b, + 0x37, 0x41, 0x3b, 0x64, 0x52, 0x40, 0x36, 0x4e, 0x46, 0x3f, 0x3f, 0x47, + 0x3c, 0x3a, 0x3a, 0x41, 0x4a, 0x32, 0x5e, 0x50, 0x2d, 0x39, 0x3a, 0x38, + 0x3d, 0x2c, 0x5a, 0x3e, 0x2e, 0x47, 0x3e, 0x3e, 0x33, 0x29, 0x4c, 0x35, + 0x30, 0x4d, 0x4d, 0x4d, 0x38, 0x42, 0x51, 0x47, 0x39, 0x3c, 0x43, 0x4b, + 0x42, 0x3f, 0x3a, 0x4b, 0x44, 0x3f, 0x3a, 0x44, 0x3e, 0x37, 0x30, 0x45, + 0x3d, 0x36, 0x34, 0x3f, 0x36, 0x35, 0x37, 0x36, 0x43, 0x3b, 0x37, 0x3e, + 0x35, 0x3e, 0x32, 0x34, 0x32, 0x38, 0x3c, 0x3a, 0x3a, 0x3c, 0x30, 0x2b, + 0x31, 0x37, 0x30, 0x42, 0x36, 0x37, 0x36, 0x2c, 0x3c, 0x31, 0x41, 0x37, + 0x44, 0x41, 0x3b, 0x37, 0x41, 0x3f, 0x38, 0x3b, 0x3a, 0x3a, 0x3c, 0x2f, + 0x47, 0x41, 0x3e, 0x33, 0x42, 0x3a, 0x32, 0x34, 0x44, 0x40, 0x43, 0x3d, + 0x34, 0x41, 0x38, 0x35, 0x35, 0x3b, 0x45, 0x38, 0x32, 0x37, 0x3c, 0x2e, + 0x39, 0x40, 0x30, 0x3e, 0x42, 0x35, 0x3d, 0x36, 0x3e, 0x3d, 0x39, 0x46, + 0x3f, 0x36, 0x37, 0x49, 0x41, 0x39, 0x3d, 0x3d, 0x33, 0x44, 0x42, 0x50, + 0x3d, 0x3c, 0x3e, 0x3f, 0x42, 0x42, 0x3b, 0x3d, 0x41, 0x31, 0x39, 0x3a, + 0x44, 0x34, 0x38, 0x47, 0x44, 0x38, 0x3b, 0x42, 0x30, 0x42, 0x44, 0x57, + 0x49, 0x3a, 0x39, 0x4f, 0x41, 0x3e, 0x40, 0x43, 0x37, 0x42, 0x3b, 0x48, + 0x50, 0x29, 0x5b, 0x44, 0x2c, 0x40, 0x3f, 0x3c, 0x46, 0x34, 0x5c, 0x41, + 0x2c, 0x48, 0x46, 0x46, 0x35, 0x32, 0x4c, 0x35, 0x2f, 0x3b, 0x48, 0x44, + 0x41, 0x41, 0x49, 0x45, 0x34, 0x37, 0x44, 0x45, 0x43, 0x3b, 0x42, 0x44, + 0x3a, 0x37, 0x48, 0x49, 0x34, 0x39, 0x33, 0x4a, 0x40, 0x3d, 0x33, 0x39, + 0x39, 0x3b, 0x30, 0x31, 0x3d, 0x47, 0x3c, 0x3a, 0x34, 0x3c, 0x3a, 0x2b, + 0x3a, 0x34, 0x41, 0x40, 0x42, 0x36, 0x44, 0x2c, 0x40, 0x47, 0x3b, 0x37, + 0x38, 0x42, 0x44, 0x29, 0x36, 0x3d, 0x3d, 0x36, 0x42, 0x3b, 0x35, 0x36, + 0x43, 0x39, 0x41, 0x3d, 0x45, 0x41, 0x31, 0x32, 0x40, 0x3d, 0x3c, 0x41, + 0x3e, 0x3d, 0x35, 0x34, 0x32, 0x38, 0x36, 0x3f, 0x3b, 0x3d, 0x39, 0x36, + 0x40, 0x3e, 0x3d, 0x3a, 0x3a, 0x3b, 0x3c, 0x32, 0x40, 0x34, 0x3a, 0x36, + 0x42, 0x47, 0x3e, 0x33, 0x3a, 0x44, 0x30, 0x39, 0x40, 0x3a, 0x36, 0x44, + 0x3c, 0x3b, 0x3f, 0x33, 0x3e, 0x3c, 0x35, 0x53, 0x43, 0x3c, 0x3f, 0x43, + 0x3d, 0x44, 0x33, 0x47, 0x42, 0x40, 0x37, 0x3b, 0x43, 0x3f, 0x33, 0x41, + 0x38, 0x42, 0x44, 0x3d, 0x2d, 0x3f, 0x46, 0x49, 0x4e, 0x3f, 0x36, 0x45, + 0x45, 0x39, 0x40, 0x42, 0x39, 0x39, 0x3a, 0x42, 0x45, 0x2c, 0x61, 0x44, + 0x30, 0x45, 0x38, 0x3a, 0x40, 0x37, 0x58, 0x39, 0x31, 0x3e, 0x3a, 0x3e, + 0x37, 0x32, 0x4a, 0x39, 0x2e, 0x47, 0x3e, 0x4e, 0x3f, 0x3e, 0x48, 0x45, + 0x3f, 0x48, 0x3a, 0x3f, 0x40, 0x36, 0x3a, 0x44, 0x36, 0x3e, 0x3d, 0x41, + 0x45, 0x36, 0x36, 0x4b, 0x3a, 0x3d, 0x45, 0x48, 0x38, 0x45, 0x39, 0x38, + 0x38, 0x3a, 0x42, 0x34, 0x3f, 0x34, 0x39, 0x34, 0x32, 0x3f, 0x3c, 0x3d, + 0x3d, 0x47, 0x3a, 0x2f, 0x3c, 0x3e, 0x3f, 0x39, 0x35, 0x42, 0x3c, 0x2a, + 0x3b, 0x35, 0x42, 0x44, 0x46, 0x39, 0x38, 0x39, 0x43, 0x3a, 0x38, 0x42, + 0x3d, 0x3a, 0x40, 0x35, 0x34, 0x39, 0x3a, 0x38, 0x43, 0x42, 0x42, 0x2d, + 0x31, 0x3b, 0x33, 0x40, 0x3b, 0x47, 0x35, 0x30, 0x3a, 0x3c, 0x3b, 0x47, + 0x3a, 0x3c, 0x38, 0x35, 0x3c, 0x35, 0x3e, 0x3e, 0x39, 0x3d, 0x39, 0x40, + 0x37, 0x33, 0x49, 0x38, 0x3c, 0x43, 0x34, 0x40, 0x39, 0x42, 0x3c, 0x3b, + 0x3e, 0x45, 0x3e, 0x51, 0x3d, 0x3f, 0x3b, 0x34, 0x37, 0x3c, 0x40, 0x47, + 0x3c, 0x41, 0x3f, 0x41, 0x37, 0x3e, 0x36, 0x3c, 0x42, 0x40, 0x3f, 0x3a, + 0x3b, 0x42, 0x44, 0x4b, 0x4b, 0x37, 0x41, 0x4d, 0x41, 0x45, 0x40, 0x41, + 0x40, 0x38, 0x37, 0x40, 0x42, 0x2c, 0x57, 0x43, 0x2d, 0x49, 0x3a, 0x3e, + 0x37, 0x2f, 0x52, 0x37, 0x31, 0x42, 0x3b, 0x3f, 0x39, 0x38, 0x48, 0x3c, + 0x37, 0x3d, 0x3a, 0x39, 0x3a, 0x45, 0x4b, 0x49, 0x3e, 0x44, 0x48, 0x49, + 0x3d, 0x39, 0x3c, 0x41, 0x41, 0x38, 0x45, 0x38, 0x33, 0x3d, 0x37, 0x47, + 0x34, 0x3f, 0x3b, 0x3d, 0x39, 0x34, 0x30, 0x39, 0x44, 0x36, 0x34, 0x3c, + 0x37, 0x38, 0x45, 0x34, 0x40, 0x33, 0x41, 0x3a, 0x3e, 0x3c, 0x3b, 0x3a, + 0x40, 0x3f, 0x3b, 0x3d, 0x3b, 0x46, 0x41, 0x2a, 0x3a, 0x3c, 0x42, 0x46, + 0x33, 0x3f, 0x2d, 0x3a, 0x45, 0x45, 0x38, 0x3b, 0x44, 0x34, 0x35, 0x3f, + 0x34, 0x43, 0x38, 0x3e, 0x41, 0x3b, 0x42, 0x38, 0x3d, 0x3f, 0x38, 0x45, + 0x3b, 0x35, 0x39, 0x3c, 0x43, 0x43, 0x38, 0x34, 0x44, 0x43, 0x2e, 0x39, + 0x39, 0x40, 0x39, 0x41, 0x41, 0x34, 0x3e, 0x44, 0x3d, 0x43, 0x3a, 0x3a, + 0x3b, 0x3b, 0x36, 0x45, 0x3c, 0x43, 0x3d, 0x48, 0x36, 0x36, 0x39, 0x55, + 0x35, 0x40, 0x3e, 0x49, 0x40, 0x3a, 0x3d, 0x3d, 0x34, 0x47, 0x40, 0x41, + 0x40, 0x47, 0x39, 0x3e, 0x3b, 0x38, 0x3c, 0x3a, 0x35, 0x3e, 0x41, 0x4a, + 0x4b, 0x3f, 0x36, 0x3d, 0x40, 0x3c, 0x39, 0x32, 0x33, 0x36, 0x30, 0x42, + 0x42, 0x36, 0x54, 0x48, 0x2e, 0x4c, 0x34, 0x3c, 0x39, 0x36, 0x4e, 0x37, + 0x2f, 0x3e, 0x30, 0x3d, 0x36, 0x3b, 0x45, 0x36, 0x37, 0x3e, 0x41, 0x4b, + 0x3b, 0x36, 0x45, 0x3b, 0x38, 0x45, 0x3e, 0x43, 0x48, 0x46, 0x44, 0x44, + 0x3e, 0x3b, 0x37, 0x3b, 0x3a, 0x3f, 0x3d, 0x44, 0x39, 0x38, 0x45, 0x43, + 0x3d, 0x35, 0x39, 0x2c, 0x44, 0x41, 0x36, 0x40, 0x3d, 0x39, 0x3d, 0x2f, + 0x3d, 0x39, 0x42, 0x3d, 0x36, 0x46, 0x43, 0x2c, 0x41, 0x3a, 0x30, 0x45, + 0x3f, 0x41, 0x35, 0x2b, 0x3b, 0x38, 0x3a, 0x44, 0x32, 0x32, 0x39, 0x3c, + 0x3a, 0x3a, 0x3c, 0x3a, 0x35, 0x40, 0x3b, 0x31, 0x36, 0x33, 0x35, 0x34, + 0x3c, 0x3b, 0x3d, 0x36, 0x48, 0x3b, 0x3f, 0x42, 0x3e, 0x33, 0x2f, 0x3a, + 0x49, 0x41, 0x39, 0x3e, 0x3c, 0x44, 0x3c, 0x39, 0x33, 0x39, 0x36, 0x35, + 0x3d, 0x42, 0x34, 0x3e, 0x38, 0x45, 0x40, 0x45, 0x3d, 0x48, 0x42, 0x4a, + 0x3f, 0x45, 0x38, 0x42, 0x44, 0x40, 0x34, 0x49, 0x44, 0x3d, 0x3a, 0x39, + 0x3e, 0x3a, 0x42, 0x3e, 0x48, 0x42, 0x3e, 0x3a, 0x3f, 0x3f, 0x32, 0x3b, + 0x38, 0x41, 0x3c, 0x39, 0x33, 0x45, 0x44, 0x3c, 0x48, 0x41, 0x41, 0x3d, + 0x3a, 0x3c, 0x37, 0x33, 0x41, 0x3f, 0x38, 0x3a, 0x3f, 0x37, 0x51, 0x3c, + 0x37, 0x3a, 0x43, 0x37, 0x40, 0x31, 0x4f, 0x34, 0x3b, 0x44, 0x45, 0x39, + 0x40, 0x33, 0x49, 0x33, 0x3e, 0x35, 0x44, 0x3d, 0x3b, 0x3f, 0x43, 0x41, + 0x43, 0x43, 0x48, 0x44, 0x46, 0x3b, 0x43, 0x3f, 0x3c, 0x3f, 0x3e, 0x3d, + 0x3b, 0x41, 0x3c, 0x43, 0x30, 0x34, 0x39, 0x33, 0x3f, 0x38, 0x36, 0x2e, + 0x33, 0x3f, 0x3c, 0x40, 0x3d, 0x3b, 0x3b, 0x31, 0x36, 0x41, 0x3b, 0x38, + 0x46, 0x36, 0x34, 0x31, 0x42, 0x44, 0x33, 0x35, 0x3f, 0x36, 0x3c, 0x30, + 0x3f, 0x31, 0x39, 0x3e, 0x3f, 0x47, 0x3e, 0x34, 0x36, 0x36, 0x34, 0x39, + 0x37, 0x46, 0x40, 0x33, 0x3b, 0x3a, 0x3f, 0x41, 0x37, 0x44, 0x3a, 0x3f, + 0x34, 0x45, 0x37, 0x33, 0x3f, 0x47, 0x41, 0x36, 0x39, 0x3e, 0x40, 0x38, + 0x41, 0x3d, 0x3d, 0x36, 0x40, 0x3a, 0x3b, 0x3b, 0x41, 0x3b, 0x3a, 0x3f, + 0x3f, 0x3b, 0x35, 0x42, 0x46, 0x3a, 0x30, 0x45, 0x40, 0x37, 0x39, 0x39, + 0x3d, 0x38, 0x3f, 0x45, 0x3f, 0x31, 0x32, 0x3b, 0x35, 0x3e, 0x3b, 0x38, + 0x3b, 0x44, 0x37, 0x39, 0x37, 0x42, 0x3f, 0x44, 0x38, 0x36, 0x37, 0x44, + 0x45, 0x46, 0x41, 0x3b, 0x46, 0x42, 0x43, 0x43, 0x3a, 0x4b, 0x37, 0x35, + 0x3b, 0x40, 0x32, 0x38, 0x41, 0x38, 0x4f, 0x3e, 0x36, 0x3f, 0x47, 0x3b, + 0x47, 0x3b, 0x4a, 0x2e, 0x3d, 0x45, 0x3b, 0x46, 0x3e, 0x38, 0x43, 0x38, + 0x41, 0x48, 0x3a, 0x39, 0x40, 0x45, 0x3b, 0x43, 0x40, 0x3e, 0x43, 0x41, + 0x41, 0x3e, 0x39, 0x3f, 0x35, 0x42, 0x33, 0x3f, 0x3d, 0x32, 0x45, 0x3c, + 0x41, 0x31, 0x45, 0x38, 0x43, 0x45, 0x41, 0x35, 0x35, 0x40, 0x44, 0x36, + 0x3a, 0x3b, 0x3c, 0x2c, 0x3e, 0x41, 0x33, 0x3d, 0x46, 0x34, 0x3b, 0x30, + 0x30, 0x42, 0x43, 0x3d, 0x3d, 0x3d, 0x43, 0x31, 0x3f, 0x40, 0x3a, 0x3f, + 0x48, 0x3e, 0x3b, 0x39, 0x44, 0x43, 0x3b, 0x3a, 0x42, 0x38, 0x38, 0x3b, + 0x3f, 0x44, 0x37, 0x3e, 0x45, 0x40, 0x41, 0x3b, 0x3c, 0x3a, 0x38, 0x37, + 0x3b, 0x33, 0x3f, 0x35, 0x43, 0x3d, 0x33, 0x41, 0x3b, 0x46, 0x39, 0x32, + 0x39, 0x3f, 0x3b, 0x39, 0x47, 0x3c, 0x3f, 0x39, 0x34, 0x3d, 0x3c, 0x46, + 0x3f, 0x3e, 0x3e, 0x44, 0x34, 0x40, 0x3f, 0x39, 0x3c, 0x38, 0x36, 0x45, + 0x42, 0x46, 0x3b, 0x44, 0x3a, 0x3d, 0x3b, 0x42, 0x3b, 0x3b, 0x3c, 0x45, + 0x42, 0x3d, 0x36, 0x37, 0x3d, 0x43, 0x3f, 0x48, 0xa6, 0xfb, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00, + 0x39, 0xff, 0xff, 0xff, 0xe5, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, + 0x68, 0xfb, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, - 0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, - 0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, - 0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00, + 0x70, 0x03, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, + 0xa4, 0x02, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x5f, 0x73, 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x3c, 0xfd, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x61, 0x64, 0x64, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xfd, 0xff, 0xff, + 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x97, 0xf5, 0x3f, + 0x01, 0x00, 0x00, 0x00, 0x87, 0x35, 0xa0, 0x43, 0x01, 0x00, 0x00, 0x00, + 0xd6, 0xd7, 0x28, 0xc3, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00, - 0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76, - 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff, + 0x05, 0x80, 0xbf, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x85, 0xc0, 0xbe, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x03, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, + 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, + 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, + 0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xa4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, - 0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff, - 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x17, 0xac, 0x6e, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x4e, 0x97, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xaf, 0x27, 0x21, 0xbe, + 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f, + 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, - 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x42, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfc, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, - 0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, - 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, - 0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff, - 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, - 0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f, - 0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3, - 0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73, - 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff, - 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x17, 0xac, 0xee, 0x39, 0x5a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f, + 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, + 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, + 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x9d, 0xaf, 0xd0, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0xe7, 0x29, 0x9e, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x5b, 0x91, 0xc3, 0xbd, + 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00, + 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, + 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, - 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, - 0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, - 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, - 0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, - 0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, - 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00, - 0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x62, 0x1b, 0x1c, 0x3b, 0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc new file mode 100644 index 0000000000..3ad29e53c8 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc @@ -0,0 +1,158 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h" + +/* File automatically created by + * tensorflow/examples/speech_commands/wav_to_features.py \ + * --sample_rate=16000 \ + * --clip_duration_ms=1000 \ + * --window_size_ms=30 \ + * --window_stride_ms=20 \ + * --feature_bin_count=40 \ + * --quantize \ + * --preprocess="average" \ + * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \ + * --output_c_file="yes_features_data.cc" \ + */ + +const int g_yes_f2e59fea_nohash_1_width = 43; +const int g_yes_f2e59fea_nohash_1_height = 49; +const unsigned char g_yes_f2e59fea_nohash_1_data[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 19, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 3, 3, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 89, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 13, + 1, 6, 23, 20, 6, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 177, 42, 1, + 1, 0, 0, 0, 0, 2, 3, 119, 51, 5, 139, 92, 58, 58, 15, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 13, 165, 176, 3, 1, 1, 0, 0, 1, 1, 32, 214, + 26, 19, 113, 103, 28, 22, 27, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 55, 128, + 27, 1, 1, 0, 1, 4, 2, 52, 93, 10, 28, 156, 10, 21, 21, 3, 3, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 14, 99, 32, 65, 7, 1, 2, 2, 6, 13, 121, + 36, 15, 11, 112, 125, 14, 5, 13, 4, 4, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, + 32, 5, 1, 0, 0, 0, 1, 0, 7, 5, 1, 1, 3, 3, 0, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 13, 13, 5, 1, 0, 0, 0, 0, 0, 3, + 4, 1, 0, 1, 2, 3, 1, 1, 1, 4, 8, 1, 2, 1, 3, 1, 1, + 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 8, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 2, + 1, 0, 2, 0, 2, 2, 3, 1, 1, 0, 1, 1, 4, 5, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 2, 1, 0, 1, 3, 1, + 1, 3, 1, 1, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 6, 2, 4, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 2, 3, 5, 2, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h new file mode 100644 index 0000000000..33ac230862 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ + +extern const int g_yes_f2e59fea_nohash_1_width; +extern const int g_yes_f2e59fea_nohash_1_height; +extern const unsigned char g_yes_f2e59fea_nohash_1_data[]; + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h index 104509c9dc..3b6554dea6 100644 --- a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h +++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h @@ -135,4 +135,22 @@ extern tflite::ErrorReporter* reporter; } \ } while (false) +#define TF_LITE_MICRO_EXPECT_GT(x, y) \ + do { \ + if ((x) <= (y)) { \ + micro_test::reporter->Report(#x " > " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#define TF_LITE_MICRO_EXPECT_LT(x, y) \ + do { \ + if ((x) >= (y)) { \ + micro_test::reporter->Report(#x " < " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + #endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile index 880bb4763c..3f749e53ef 100644 --- a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile +++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile @@ -55,7 +55,9 @@ MICROLITE_LIB_NAME := libtensorflow-microlite.a # Test binary for the microcontroller speech model. MICRO_SPEECH_TEST_SRCS := \ tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \ -tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc MICROLITE_TEST_SRCS := \ $(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \ -- GitLab From b55ff8747914d41f16fcfe02a7346c574dff1bb7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 06:32:38 -0700 Subject: [PATCH 269/411] Internal change. PiperOrigin-RevId: 216686849 --- .../internal/optimized/depthwiseconv_float.h | 27 ------------------- .../internal/optimized/depthwiseconv_uint8.h | 27 ------------------- 2 files changed, 54 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index d8dd7bba89..bcadfb2f8c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -841,33 +841,6 @@ inline void FloatDepthwiseConvAccumRowGeneric( int filter_width, const float* filter_data, int out_x_buffer_start, int out_x_buffer_end, int output_depth, float* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); -#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK - LOG(FATAL) - << "\n\n" - << "*****************************************************************\n" - << "* This tfmini inference code was about to use the slow generic\n" - << "* fallback implementation for a DepthwiseConv op, and we want you\n" - << "* to be aware of that so that you will know why you get terrible\n" - << "* performance.\n" - << "*\n" - << "* If you would like to carry on with the slow code, compile\n" - << "* with this preprocessor token defined:\n" - << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" - << "*\n" - << "* The right thing to do, if you care about performance, is to add\n" - << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" - << "* The relevant parameters defining your case are:\n" - << "* stride = " << stride << "\n" - << "* input_depth = " << input_depth << "\n" - << "* depth_multiplier = " << depth_multiplier << "\n" - << "* dilation_factor = " << dilation_factor << "\n" - << "*\n" - << "* Please do not hesitate to contact benoitjacob@ with this\n" - << "* information.\n" - << "*****************************************************************\n"; -#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK const float* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 803eff292a..eff9cab477 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1546,33 +1546,6 @@ inline void QuantizedDepthwiseConvAccumRowGeneric( int16 filter_offset, int out_x_buffer_start, int out_x_buffer_end, int output_depth, int32* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); -#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK - LOG(FATAL) - << "\n\n" - << "*****************************************************************\n" - << "* This tfmini inference code was about to use the slow generic\n" - << "* fallback implementation for a DepthwiseConv op, and we want you\n" - << "* to be aware of that so that you will know why you get terrible\n" - << "* performance.\n" - << "*\n" - << "* If you would like to carry on with the slow code, compile\n" - << "* with this preprocessor token defined:\n" - << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" - << "*\n" - << "* The right thing to do, if you care about performance, is to add\n" - << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" - << "* The relevant parameters defining your case are:\n" - << "* stride = " << stride << "\n" - << "* input_depth = " << input_depth << "\n" - << "* depth_multiplier = " << depth_multiplier << "\n" - << "* dilation_factor = " << dilation_factor << "\n" - << "*\n" - << "* Please do not hesitate to contact benoitjacob@ with this\n" - << "* information.\n" - << "*****************************************************************\n"; -#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK const uint8* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( -- GitLab From b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 11 Oct 2018 06:49:37 -0700 Subject: [PATCH 270/411] Support arbitrary many values in KeyValueSort on GPU backend. PiperOrigin-RevId: 216688700 --- .../xla/service/gpu/ir_emitter_unnested.cc | 72 +++++++++++-------- tensorflow/compiler/xla/service/llvm_ir/BUILD | 1 - .../compiler/xla/service/llvm_ir/sort_util.cc | 20 +++--- .../compiler/xla/service/llvm_ir/sort_util.h | 5 +- 4 files changed, 54 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index d8ae5b46fe..fd624fda08 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -34,6 +34,7 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h" @@ -2192,34 +2193,34 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { std::vector> thunks; - auto keys = sort->operand(0); - auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr; - ShapeIndex keys_shape_index({}); - ShapeIndex values_shape_index({}); - if (values != nullptr) { - keys_shape_index = ShapeIndex({0}); - values_shape_index = ShapeIndex({1}); - } - auto keys_destination = GetAllocationSlice(*sort, keys_shape_index); - auto values_destination = GetAllocationSlice(*sort, values_shape_index); - - if (keys_destination != GetAllocationSlice(*keys)) { - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*keys), - /*destination_buffer=*/keys_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(keys->shape()), nullptr)); - } - if (values != nullptr && values_destination != GetAllocationSlice(*values)) { - // TODO(b/26783907): Figure out why we never seem to share buffers for - // key/value sort. - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*values), - /*destination_buffer=*/values_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(values->shape()), nullptr)); + Shape keys_shape = sort->operand(0)->shape(); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + // We assume that the layout of all involved operands and outputs is the + // same. + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + sort->operand(i)->shape())); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); + + // If possible, we share buffers. If that is not possible, we need to copy + // the values, because the emitter does the sorting in-place. + auto destination_buffer = GetAllocationSlice(*sort, shape_index); + auto source_address = GetAllocationSlice(*sort->operand(i)); + if (destination_buffer != source_address) { + // TODO(b/26783907): Figure out why we never seem to share buffers for + // key/value sort. + thunks.push_back(absl::make_unique( + /*source_address=*/source_address, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()), + nullptr)); + } } int64 dimension_to_sort = sort->dimensions(0); - int64 dimension_to_sort_bound = keys->shape().dimensions(dimension_to_sort); + int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); auto index_type = b_.getInt64Ty(); @@ -2243,7 +2244,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { thunks.push_back( BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( - keys->shape(), ir_emitter_context_->device_description()); + keys_shape, ir_emitter_context_->device_description()); UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); @@ -2254,12 +2255,21 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { xor_mask = llvm::ConstantInt::get(index_type, 1LL << mask); } + IrArray keys_array; + std::vector values_arrays; + values_arrays.reserve(sort->operand_count() - 1); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + if (i == 0) { + keys_array = GetIrArray(*sort, *sort, shape_index); + } else { + values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); + } + } TF_RETURN_IF_ERROR(llvm_ir::EmitSortInPlace( - dimension_to_sort, GetIrArray(*sort, *sort, keys_shape_index), - values != nullptr ? absl::make_optional( - GetIrArray(*sort, *sort, values_shape_index)) - : absl::nullopt, - IrName(sort), xor_mask, &b_, &launch_dimensions)); + dimension_to_sort, keys_array, values_arrays, IrName(sort), xor_mask, + &b_, &launch_dimensions)); } } diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 6223a34b12..5f7ad81d82 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -204,7 +204,6 @@ cc_library( "//tensorflow/compiler/xla/service/gpu:partition_assignment", "//tensorflow/core:lib", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", "@llvm//:core", "@llvm//:support", ], diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc index 944c79580c..05ba4a40da 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc @@ -15,9 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/sort_util.h" +#include + // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "llvm/ADT/APInt.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -43,7 +44,7 @@ namespace { void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, const IrArray::Index& compare_keys_index, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, llvm::IRBuilder<>* b) { // if (is_smaller_index && // compare_keys[dimension_to_sort] < dimension_to_sort_bound) @@ -100,19 +101,18 @@ void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, // Swap key1 with key2. keys_array.EmitWriteArrayElement(keys_index, key2, b); keys_array.EmitWriteArrayElement(compare_keys_index, key1, b); - if (values_array.has_value()) { + for (const auto& values_array : values_arrays) { // Also swap the values. - auto value1 = values_array.value().EmitReadArrayElement(keys_index, b); - auto value2 = - values_array.value().EmitReadArrayElement(compare_keys_index, b); - values_array.value().EmitWriteArrayElement(keys_index, value2, b); - values_array.value().EmitWriteArrayElement(compare_keys_index, value1, b); + auto value1 = values_array.EmitReadArrayElement(keys_index, b); + auto value2 = values_array.EmitReadArrayElement(compare_keys_index, b); + values_array.EmitWriteArrayElement(keys_index, value2, b); + values_array.EmitWriteArrayElement(compare_keys_index, value1, b); } } } // namespace Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions) { @@ -162,7 +162,7 @@ Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, compare_keys_index[dimension_to_sort] = b->CreateXor(compare_index[0], xor_mask); EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index, - keys_array, values_array, b); + keys_array, values_arrays, b); return Status::OK(); }; if (launch_dimensions != nullptr) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h index 527ed10374..2f3bcda230 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h @@ -16,8 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ +#include + #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "llvm/IR/Value.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" @@ -31,7 +32,7 @@ namespace llvm_ir { // implements the inner loop of BitonicSort. If 'launch_dimensions' is nullptr, // the inner compare loop will not be parallelized. Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions); -- GitLab From 5eb3d92fc5d7a0641ad5d1ad2b54870b6e5b5e58 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 07:15:05 -0700 Subject: [PATCH 271/411] Disallow PRED as element type for arithmetic operations in HloVerifier PiperOrigin-RevId: 216691593 --- tensorflow/compiler/xla/service/shape_inference.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index aa49f98bcf..25afc23e5b 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -919,6 +919,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, switch (opcode) { case HloOpcode::kMaximum: case HloOpcode::kMinimum: + return InferElementwiseBinaryOpShape(opcode, lhs, rhs, + broadcast_dimensions); + case HloOpcode::kSubtract: case HloOpcode::kAdd: case HloOpcode::kAtan2: @@ -929,6 +932,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: + if (lhs.element_type() == PRED || rhs.element_type() == PRED) { + return InvalidArgument( + "Expected element type in shape to be arithmetic type for " + "operation %s; got PRED.", + HloOpcodeString(opcode)); + } return InferElementwiseBinaryOpShape(opcode, lhs, rhs, broadcast_dimensions); -- GitLab From 4c080129c62d625c3f57c7d9ee6d8dab8e91ca1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 08:06:10 -0700 Subject: [PATCH 272/411] Automated rollback of commit e5537748753491f803fbddebdcb1cdb710631db9 PiperOrigin-RevId: 216697425 --- tensorflow/python/ops/image_ops_impl.py | 14 +++++--------- tensorflow/python/ops/image_ops_test.py | 10 ---------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index a5c800ed9f..1c75aab578 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1184,8 +1184,7 @@ def per_image_standardization(image): away from zero to protect against division by 0 when handling uniform images. Args: - image: 4-D Tensor of shape `[batch, height, width, channels]` or - 3-D Tensor of shape `[height, width, channels]`. + image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. @@ -1195,17 +1194,14 @@ def per_image_standardization(image): """ with ops.name_scope(None, 'per_image_standardization', [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _AssertAtLeast3DImage(image) - if image.get_shape().ndims != 3 and image.get_shape().ndims != 4: - raise ValueError('`image` must have either 3 or 4 dimensions.') - num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-1:-4:-1]) + image = _Assert3DImage(image) + num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) - image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) + image_mean = math_ops.reduce_mean(image) variance = ( - math_ops.reduce_mean( - math_ops.square(image), axis=[-1, -2, -3], keepdims=True) - + math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 9ed3b4ff5d..ff86df6346 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1491,16 +1491,6 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase): whiten_np = whiten.eval() self.assertFalse(np.any(np.isnan(whiten_np))) - def testBatchWhitening(self): - imgs_np = np.random.uniform(0., 255., [4, 24, 24, 3]) - whiten_np = [self._NumpyPerImageWhitening(img) for img in imgs_np] - with self.test_session(use_gpu=True): - imgs = constant_op.constant(imgs_np) - whiten = image_ops.per_image_standardization(imgs) - whiten_tf = whiten.eval() - for w_tf, w_np in zip(whiten_tf, whiten_np): - self.assertAllClose(w_tf, w_np, atol=1e-4) - class CropToBoundingBoxTest(test_util.TensorFlowTestCase): -- GitLab From 085c3e45863d57b10777d9e4903c36a1326b256a Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Thu, 11 Oct 2018 08:09:55 -0700 Subject: [PATCH 273/411] Update downloadable clang to r343880 PiperOrigin-RevId: 216697820 --- third_party/clang_toolchain/download_clang.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/clang_toolchain/download_clang.bzl b/third_party/clang_toolchain/download_clang.bzl index e782739661..5fa459caf1 100644 --- a/third_party/clang_toolchain/download_clang.bzl +++ b/third_party/clang_toolchain/download_clang.bzl @@ -39,15 +39,15 @@ def download_clang(repo_ctx, out_folder): # Latest CLANG_REVISION and CLANG_SUB_REVISION of the Chromiums's release # can be found in https://chromium.googlesource.com/chromium/src/tools/clang/+/master/scripts/update.py - CLANG_REVISION = "340427" + CLANG_REVISION = "343880" CLANG_SUB_REVISION = 1 package_version = "%s-%s" % (CLANG_REVISION, CLANG_SUB_REVISION) checksums = { - "Linux_x64": "8a8f21fb624fc7be7e91e439a13114847185375bb932db51ba590174ecaf764b", - "Mac": "ba894536b7c8d37103a5ddba784f268d55e65bb2ea1200a2cf9f2ef1590eaacd", - "Win": "c3f5bd977266dfd011411c94a13e00974b643b70fb0225a5fb030f7f703fa474", + "Linux_x64": "3530f53516fd08799e2754601e53a19531e1db5bc73c9ad8d2d1d8efdd9c9c9b", + "Mac": "8761b47869089be216324af8c5a93cba2d539a1d252c9c8cad8f2cd6da21f9f4", + "Win": "06eb08aa0b1ff7ea65db375a7dc7151cde7c89a44044fb63e5b73ea2f96c6e65", } platform_folder = _get_platform_folder(repo_ctx.os.name) -- GitLab From 0b1ce4d6f5912c7d749e20207402ee871598b21a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 09:05:09 -0700 Subject: [PATCH 274/411] Automated rollback of commit 9b558126e31d25ec4e82cb4f50033d6eca44349a PiperOrigin-RevId: 216705354 --- .../grappler/optimizers/graph_optimizer.h | 21 ------ .../grappler/optimizers/meta_optimizer.cc | 70 +------------------ .../core/grappler/optimizers/meta_optimizer.h | 15 +--- .../optimizers/meta_optimizer_test.cc | 62 ---------------- .../core/protobuf/rewriter_config.proto | 4 -- 5 files changed, 5 insertions(+), 167 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h index bd6bf9f860..765dd13263 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h @@ -16,11 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_ #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_ -#include #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { namespace grappler { @@ -32,7 +29,6 @@ struct GrapplerItem; // optimization of a GrapplerItem for running on a cluster. class GraphOptimizer { public: - GraphOptimizer() : is_cancelled_(false) {} virtual ~GraphOptimizer() {} virtual string name() const = 0; @@ -49,25 +45,8 @@ class GraphOptimizer { // call to Optimize) performed. Lower "result" scores are better. virtual void Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& optimized_graph, double result) = 0; - - // Best effort cancellation. Sets is_cancelled to true and requests that the - // optimizer returns as soon as possible from active calls to Optimize() or - // FeedBack(). - void Cancel() { is_cancelled_ = true; } - - bool is_cancelled() const { return is_cancelled_; } - - private: - std::atomic is_cancelled_; }; -#define GRAPPLER_RETURN_IF_CANCELLED() \ - do { \ - if (is_cancelled()) { \ - return errors::DeadlineExceeded(this->name(), " was cancelled."); \ - } \ - } while (0) - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index b8f4599f56..c3d70a1fdf 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -14,9 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" - -#include - #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/versions.pb.h" @@ -40,11 +37,7 @@ limitations under the License. #include "tensorflow/core/grappler/utils/functions.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" -#include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/platform/notification.h" -#include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -121,23 +114,6 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( #undef MK_OPT -MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) - : cpu_device_(cpu_device), cfg_(cfg) { - DCHECK(cpu_device_ == nullptr || - cpu_device_->attributes().device_type() == "CPU"); - // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs() - // if we want to the threadpool for parallelizing Grappler - const int kNumThreads = 1; - thread_pool_ = absl::make_unique( - Env::Default(), "MetaOptimizerThreadPool", kNumThreads); -} - -MetaOptimizer::~MetaOptimizer() { - // The ThreadPool destructor waits for threads to finish, so we don't - // pull the rug out from under them. - thread_pool_.reset(); -} - Status MetaOptimizer::InitializeOptimizers( std::vector>* optimizers) const { if (cfg_.disable_meta_optimizer()) { @@ -333,7 +309,6 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, VLOG(4) << "Starting optimization iteration " << iteration; for (const auto& optimizer : optimizers) { - GRAPPLER_RETURN_IF_CANCELLED(); // Some optimizers can run only once. if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; // Some must run only on the last iteration. @@ -392,7 +367,6 @@ Status MetaOptimizer::RunOptimizer( // resets optimized_graph to an empty graph. optimized_graph->Swap(&optimized_item->graph); *optimized_graph = GraphDef(); - // TODO(rmlarsen): Add timeout for individual optimizers. Status status = optimizer->Optimize(cluster, *optimized_item, optimized_graph); uint64 end_us = Env::Default()->NowMicros(); @@ -414,15 +388,14 @@ Status MetaOptimizer::RunOptimizer( return status; } -Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary( - Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { +Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { VLOG(1) << "Starting optimization for grappler item: " << item.id; optimization_results_.clear(); // 1. Optimize main graph TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); VLOG(1) << "Optimized main graph."; - GRAPPLER_RETURN_IF_CANCELLED(); // Skip optimizing functions if this is a TPU graph. Currently, Grappler // passes do not handle TPU functions correctly in a variety of ways (Note @@ -458,8 +431,6 @@ Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary( optimize_function_library = false; for (const FunctionDef& func : optimized_graph->library().function()) { - GRAPPLER_RETURN_IF_CANCELLED(); - const string& func_name = func.signature().name(); // Skip already optimized functions. @@ -534,43 +505,6 @@ void MetaOptimizer::PrintResult() { } } -Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - const int64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000; - const int64 timeout_usec = (cfg_.meta_optimizer_timeout_ms() == 0 - ? kFiveMinutesInUsec - : cfg_.meta_optimizer_timeout_ms() * 1000); - if (timeout_usec < 0) { - return OptimizeMainGraphAndFunctionLibrary(cluster, item, optimized_graph); - } - - GraphDef optimized_with_timeout; - Status status; - Notification done; - thread_pool_->Schedule( - [this, cluster, &done, &optimized_with_timeout, &item, &status]() { - status = this->OptimizeMainGraphAndFunctionLibrary( - cluster, item, &optimized_with_timeout); - done.Notify(); - }); - - const bool notified = WaitForNotificationWithTimeout(&done, timeout_usec); - if (notified && status.ok()) { - optimized_graph->Swap(&optimized_with_timeout); - } else { - *optimized_graph = item.graph; - if (!notified) { - this->Cancel(); - done.WaitForNotification(); - status = errors::DeadlineExceeded( - "Grappler MetaOptimizer timed out after ", - static_cast(timeout_usec) / (1000 * 1000), " seconds"); - LOG(WARNING) << status.error_message(); - } - } - return status; -} - void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& pruned_graph, double result) { // Nothing to do for MetaOptimizer. diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 35d6a4559b..99a0a33ffa 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -29,8 +28,9 @@ namespace grappler { // Run the other grappler optimizers based on the specified rewriter config. class MetaOptimizer : public GraphOptimizer { public: - MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg); - ~MetaOptimizer(); + MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) + : cpu_device_(cpu_device), cfg_(cfg) {} + ~MetaOptimizer() override = default; string name() const override { return "meta_optimizer"; }; @@ -65,18 +65,9 @@ class MetaOptimizer : public GraphOptimizer { Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph); - // Run optimization passes over the main graph and for functions in the - // function library. - Status OptimizeMainGraphAndFunctionLibrary(Cluster* cluster, - const GrapplerItem& item, - GraphDef* optimized_graph); - DeviceBase* const cpu_device_; // may be NULL RewriterConfig cfg_; - // Thread pool used for launching optimizers asynchronously. - std::unique_ptr thread_pool_; - struct OptimizerResult { string optimizer_name; string result; diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc index 7f1dd91f09..3f3f43382f 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc @@ -461,68 +461,6 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) { EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites); } -class SleepingOptimizer : public CustomGraphOptimizer { - public: - SleepingOptimizer() {} - string name() const override { return "test_optimizer"; } - - Status Init( - const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { - return Status::OK(); - } - - Status Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) override { - *optimized_graph = item.graph; - optimized_graph->add_node(); - sleep(1); - return Status::OK(); - } - - void Feedback(Cluster* cluster, const GrapplerItem& item, - const GraphDef& optimized_graph, double result) override {} -}; - -REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer); - -TEST_F(MetaOptimizerTest, OptimizerTimesOut) { - TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); - GrapplerItem item; - CHECK(fake_input.NextItem(&item)); - - RewriterConfig rewriter_config; - rewriter_config.add_optimizers("SleepingOptimizer"); - rewriter_config.set_min_graph_nodes(-1); - rewriter_config.set_meta_optimizer_timeout_ms(1500); - rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO); - - MetaOptimizer optimizer(nullptr, rewriter_config); - GraphDef output; - const Status status = optimizer.Optimize(nullptr, item, &output); - EXPECT_EQ(status.error_message(), - "Grappler MetaOptimizer timed out after 1.5 seconds"); - // Make sure the graph was reverted to the original regardless of when the - // optimizer timed out. - CompareGraphs(item.graph, output); -} - -TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) { - TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); - GrapplerItem item; - CHECK(fake_input.NextItem(&item)); - - RewriterConfig rewriter_config; - rewriter_config.add_optimizers("SleepingOptimizer"); - rewriter_config.set_min_graph_nodes(-1); - rewriter_config.set_meta_optimizer_timeout_ms(1500); - rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE); - MetaOptimizer optimizer(nullptr, rewriter_config); - GraphDef output; - const Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - EXPECT_EQ(item.graph.node_size() + 1, output.node_size()); -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 7ccd54b818..8c31468ff5 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -83,10 +83,6 @@ message RewriterConfig { // Controls how many times we run the optimizers in meta optimizer (default // is once). NumIterationsType meta_optimizer_iterations = 12; - // Maximum number of milliseconds to spend optimizing a single graph before - // timing out. If equal to 0 the system picks a default (currently 5 minutes). - // If less than 0 the optimizer will never time out. - int64 meta_optimizer_timeout_ms = 20; // The minimum number of nodes in a graph to optimizer. For smaller graphs, // optimization is skipped. -- GitLab From 7ac4860203f85a5d42cac32d1585f91fc6345e9c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 11 Oct 2018 09:05:20 -0700 Subject: [PATCH 275/411] Raise the VLOG level for CalculateTensorSize information. PiperOrigin-RevId: 216705389 --- tensorflow/core/grappler/costs/utils.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 87b74e2952..7d868a3679 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -209,7 +209,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) { // Can't infer the size if the rank is unknown. It has to be at least a // scalar though. if (shape.unknown_rank()) { - VLOG(1) << "CalculateTensorSize() -- unknown rank"; + VLOG(2) << "CalculateTensorSize() -- unknown rank"; return size; } @@ -217,7 +217,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) { for (int i = 0; i < shape.dim_size(); ++i) { if (shape.dim(i).size() < 0) { shape.mutable_dim(i)->set_size(1); - VLOG(1) << "CalculateTensorSize() -- unknown dim: " << i; + VLOG(2) << "CalculateTensorSize() -- unknown dim: " << i; } } -- GitLab From ac54c4591d1972472195ea06151946991dc550dc Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 11 Oct 2018 09:13:02 -0700 Subject: [PATCH 276/411] Account for the possibility of nested lambda functions. PiperOrigin-RevId: 216706627 --- .../pyct/static_analysis/activity.py | 38 ++++++++++--------- .../pyct/static_analysis/activity_test.py | 11 ++++++ .../python/autograph/pyct/transformer.py | 6 ++- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py index 0ce410d522..0b95b714fb 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py @@ -133,6 +133,14 @@ class Scope(object): self.parent.mark_returned(name) +class _Lambda(object): + + no_root = True + + def __init__(self): + self.args = set() + + class ActivityAnalyzer(transformer.Base): """Annotates nodes with local scope information. @@ -151,11 +159,8 @@ class ActivityAnalyzer(transformer.Base): # leaves in the AST, that is, they cannot contain other statements. self._in_return_statement = False self._in_aug_assign = False - self._in_lambda = False self._in_function_def_args = False - self._untracked_symbols = None - @property def _in_constructor(self): if len(self.enclosing_entities) > 1: @@ -179,11 +184,12 @@ class ActivityAnalyzer(transformer.Base): return qn = anno.getanno(node, anno.Basic.QN) - # Ignore any untracked symbols. - if self._untracked_symbols: - if qn in self._untracked_symbols: + # When inside a lambda, ignore any of the lambda's arguments. + # This includes attributes or slices of those arguments. + for l in self.state[_Lambda]: + if qn in l.args: return - if qn.owner_set & set(self._untracked_symbols): + if qn.owner_set & set(l.args): return if isinstance(node.ctx, gast.Store): @@ -199,11 +205,11 @@ class ActivityAnalyzer(transformer.Base): # In function defs have the meaning of defining a variable. self.scope.mark_modified(qn) self.scope.mark_param(qn, self.enclosing_entities[-1]) - elif self._in_lambda: - assert isinstance(self._untracked_symbols, set) - self._untracked_symbols.add(qn) + elif self.state[_Lambda].level: + # In lambdas, they are tracked separately. + self.state[_Lambda].args.add(qn) else: - # TODO(mdan): Is this case even possible? + # TODO(mdan): Is this case possible at all? raise NotImplementedError( 'Param "{}" outside a function arguments or lambda.'.format(qn)) else: @@ -317,12 +323,10 @@ class ActivityAnalyzer(transformer.Base): return parent def visit_Lambda(self, node): - assert not self._in_lambda or self._in_function_def_args - self._in_lambda = True - self._untracked_symbols = set() + assert not self._in_function_def_args + self.state[_Lambda].enter() node = self.generic_visit(node) - self._untracked_symbols = None - self._in_lambda = False + self.state[_Lambda].exit() return node def visit_arguments(self, node): @@ -339,7 +343,7 @@ class ActivityAnalyzer(transformer.Base): # A separate Scope tracks the actual function definition. self._enter_scope(True) - assert not self._in_function_def_args + assert not (self._in_function_def_args or self.state[_Lambda].level) self._in_function_def_args = True node.args = self.visit(node.args) self._in_function_def_args = False diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py index 678199970c..65267751c1 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py @@ -461,6 +461,17 @@ class ActivityAnalyzerTest(test.TestCase): self.assertScopeIs(body_scope, ('b', 'd'), ('a',)) self.assertSymbolSetsAre((), body_scope.params.keys(), 'params') + def test_lambda_nested(self): + + def test_fn(a, b, c, d, e): # pylint: disable=unused-argument + a = lambda a, b: d(lambda b: a + b + c) # pylint: disable=undefined-variable + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(body_scope, ('c', 'd'), ('a',)) + self.assertSymbolSetsAre((), body_scope.params.keys(), 'params') + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/autograph/pyct/transformer.py b/tensorflow/python/autograph/pyct/transformer.py index 520f5038da..7827e6e9cd 100644 --- a/tensorflow/python/autograph/pyct/transformer.py +++ b/tensorflow/python/autograph/pyct/transformer.py @@ -92,7 +92,8 @@ class _StateStack(object): # the superclass' setattr. object.__setattr__(self, 'type', type_) object.__setattr__(self, '_stack', []) - self.enter() + if not hasattr(type_, 'no_root'): + self.enter() def enter(self): self._stack.append(self.type()) @@ -108,6 +109,9 @@ class _StateStack(object): def value(self): return self._stack[-1] + def __iter__(self): + return iter(self._stack) + def __getattr__(self, key): return getattr(self._stack[-1], key) -- GitLab From 0d054f20851f6156b1af26c35e68f6083bca8e13 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 11 Oct 2018 09:14:14 -0700 Subject: [PATCH 277/411] Have TensorFlow opensource code use tensorflow_estimator module if available. PiperOrigin-RevId: 216706771 --- tensorflow/BUILD | 1 - tensorflow/api_template.__init__.py | 17 +- tensorflow/contrib/estimator/BUILD | 337 +- tensorflow/contrib/estimator/__init__.py | 46 +- .../estimator/python/estimator/baseline.py | 92 +- .../python/estimator/baseline_test.py | 436 -- .../python/estimator/boosted_trees.py | 419 +- .../python/estimator/boosted_trees_test.py | 438 -- .../contrib/estimator/python/estimator/dnn.py | 156 +- .../python/estimator/dnn_linear_combined.py | 176 +- .../estimator/dnn_linear_combined_test.py | 227 - .../estimator/python/estimator/dnn_test.py | 171 - .../estimator/dnn_with_layer_annotations.py | 430 +- .../dnn_with_layer_annotations_test.py | 611 --- .../python/estimator/early_stopping.py | 498 +- .../python/estimator/early_stopping_test.py | 246 - .../estimator/python/estimator/export.py | 213 +- .../estimator/python/estimator/export_test.py | 373 -- .../estimator/python/estimator/exporter.py | 270 +- .../python/estimator/exporter_test.py | 206 - .../estimator/python/estimator/extenders.py | 349 +- .../python/estimator/extenders_test.py | 426 -- .../estimator/python/estimator/head.py | 969 +--- .../estimator/python/estimator/head_test.py | 1482 ------ .../estimator/python/estimator/hooks.py | 277 +- .../estimator/python/estimator/hooks_test.py | 403 -- .../estimator/python/estimator/linear.py | 130 +- .../estimator/python/estimator/linear_test.py | 156 - .../estimator/python/estimator/logit_fns.py | 86 +- .../python/estimator/logit_fns_test.py | 95 - .../estimator/python/estimator/multi_head.py | 416 +- .../python/estimator/multi_head_test.py | 705 --- .../python/estimator/replicate_model_fn.py | 820 +--- .../estimator/replicate_model_fn_test.py | 1649 ------- .../contrib/estimator/python/estimator/rnn.py | 572 +-- .../estimator/python/estimator/rnn_test.py | 1185 ----- .../python/estimator/saved_model_estimator.py | 441 +- .../estimator/saved_model_estimator_test.py | 369 -- tensorflow/python/estimator/BUILD | 424 +- tensorflow/python/estimator/__init__.py | 17 +- .../python/estimator/canned/__init__.py | 32 + .../python/estimator/canned/baseline.py | 366 +- .../python/estimator/canned/baseline_test.py | 1558 ------- .../python/estimator/canned/boosted_trees.py | 1558 +------ .../estimator/canned/boosted_trees_test.py | 2549 ----------- .../estimator/canned/boosted_trees_utils.py | 72 +- .../canned/boosted_trees_utils_test.py | 187 - tensorflow/python/estimator/canned/dnn.py | 652 +-- .../estimator/canned/dnn_linear_combined.py | 626 +-- .../canned/dnn_linear_combined_test.py | 1123 ----- .../python/estimator/canned/dnn_test.py | 580 --- .../estimator/canned/dnn_testing_utils.py | 2068 +-------- tensorflow/python/estimator/canned/head.py | 1593 +------ .../python/estimator/canned/head_test.py | 4056 ----------------- tensorflow/python/estimator/canned/linear.py | 535 +-- .../python/estimator/canned/linear_test.py | 255 -- .../estimator/canned/linear_testing_utils.py | 2349 +--------- .../python/estimator/canned/metric_keys.py | 46 +- .../python/estimator/canned/optimizers.py | 72 +- .../estimator/canned/optimizers_test.py | 103 - .../python/estimator/canned/parsing_utils.py | 296 +- .../estimator/canned/parsing_utils_test.py | 211 - .../estimator/canned/prediction_keys.py | 29 +- tensorflow/python/estimator/estimator.py | 2167 +-------- tensorflow/python/estimator/estimator_lib.py | 45 +- tensorflow/python/estimator/estimator_test.py | 3280 ------------- .../python/estimator/export/__init__.py | 32 + tensorflow/python/estimator/export/export.py | 628 +-- .../python/estimator/export/export_lib.py | 24 +- .../python/estimator/export/export_output.py | 407 +- .../estimator/export/export_output_test.py | 397 -- .../python/estimator/export/export_test.py | 802 ---- tensorflow/python/estimator/exporter.py | 498 +- tensorflow/python/estimator/exporter_test.py | 400 -- tensorflow/python/estimator/gc.py | 199 +- tensorflow/python/estimator/gc_test.py | 156 - .../python/estimator/inputs/__init__.py | 32 + tensorflow/python/estimator/inputs/inputs.py | 19 +- .../python/estimator/inputs/numpy_io.py | 217 +- .../python/estimator/inputs/numpy_io_test.py | 620 --- .../python/estimator/inputs/pandas_io.py | 147 +- .../python/estimator/inputs/pandas_io_test.py | 320 -- .../estimator/inputs/queues/__init__.py | 32 + .../inputs/queues/feeding_functions.py | 507 +-- .../inputs/queues/feeding_functions_test.py | 391 -- .../inputs/queues/feeding_queue_runner.py | 172 +- .../queues/feeding_queue_runner_test.py | 140 - tensorflow/python/estimator/keras.py | 492 +- tensorflow/python/estimator/keras_test.py | 805 ---- tensorflow/python/estimator/model_fn.py | 510 +-- tensorflow/python/estimator/model_fn_test.py | 661 --- tensorflow/python/estimator/run_config.py | 907 +--- .../python/estimator/run_config_test.py | 1235 ----- tensorflow/python/estimator/training.py | 1065 +---- tensorflow/python/estimator/training_test.py | 2198 --------- tensorflow/python/estimator/util.py | 143 +- tensorflow/python/estimator/util_test.py | 102 - tensorflow/python/feature_column/BUILD | 2 +- tensorflow/python/tools/api/generator/BUILD | 18 - ...rflow.estimator.-baseline-classifier.pbtxt | 4 +- ...orflow.estimator.-baseline-regressor.pbtxt | 4 +- .../tensorflow.estimator.-best-exporter.pbtxt | 4 +- ....estimator.-boosted-trees-classifier.pbtxt | 6 +- ...w.estimator.-boosted-trees-regressor.pbtxt | 6 +- ...nsorflow.estimator.-d-n-n-classifier.pbtxt | 4 +- ...or.-d-n-n-linear-combined-classifier.pbtxt | 4 +- ...tor.-d-n-n-linear-combined-regressor.pbtxt | 4 +- ...ensorflow.estimator.-d-n-n-regressor.pbtxt | 4 +- ...tensorflow.estimator.-estimator-spec.pbtxt | 4 +- .../v1/tensorflow.estimator.-estimator.pbtxt | 2 +- .../v1/tensorflow.estimator.-eval-spec.pbtxt | 4 +- .../v1/tensorflow.estimator.-exporter.pbtxt | 2 +- ...tensorflow.estimator.-final-exporter.pbtxt | 4 +- ...ensorflow.estimator.-latest-exporter.pbtxt | 4 +- ...sorflow.estimator.-linear-classifier.pbtxt | 4 +- ...nsorflow.estimator.-linear-regressor.pbtxt | 4 +- .../v1/tensorflow.estimator.-mode-keys.pbtxt | 2 +- .../v1/tensorflow.estimator.-run-config.pbtxt | 2 +- .../v1/tensorflow.estimator.-train-spec.pbtxt | 4 +- ...rflow.estimator.-warm-start-settings.pbtxt | 4 +- ...imator.export.-classification-output.pbtxt | 4 +- ...flow.estimator.export.-export-output.pbtxt | 2 +- ...low.estimator.export.-predict-output.pbtxt | 4 +- ....estimator.export.-regression-output.pbtxt | 4 +- ...mator.export.-serving-input-receiver.pbtxt | 4 +- ...xport.-tensor-serving-input-receiver.pbtxt | 4 +- ...rflow.estimator.-baseline-classifier.pbtxt | 4 +- ...orflow.estimator.-baseline-regressor.pbtxt | 4 +- .../tensorflow.estimator.-best-exporter.pbtxt | 4 +- ....estimator.-boosted-trees-classifier.pbtxt | 6 +- ...w.estimator.-boosted-trees-regressor.pbtxt | 6 +- ...nsorflow.estimator.-d-n-n-classifier.pbtxt | 4 +- ...or.-d-n-n-linear-combined-classifier.pbtxt | 4 +- ...tor.-d-n-n-linear-combined-regressor.pbtxt | 4 +- ...ensorflow.estimator.-d-n-n-regressor.pbtxt | 4 +- ...tensorflow.estimator.-estimator-spec.pbtxt | 4 +- .../v2/tensorflow.estimator.-estimator.pbtxt | 2 +- .../v2/tensorflow.estimator.-eval-spec.pbtxt | 4 +- .../v2/tensorflow.estimator.-exporter.pbtxt | 2 +- ...tensorflow.estimator.-final-exporter.pbtxt | 4 +- ...ensorflow.estimator.-latest-exporter.pbtxt | 4 +- ...sorflow.estimator.-linear-classifier.pbtxt | 4 +- ...nsorflow.estimator.-linear-regressor.pbtxt | 4 +- .../v2/tensorflow.estimator.-mode-keys.pbtxt | 2 +- .../v2/tensorflow.estimator.-run-config.pbtxt | 2 +- .../v2/tensorflow.estimator.-train-spec.pbtxt | 4 +- ...rflow.estimator.-warm-start-settings.pbtxt | 4 +- ...imator.export.-classification-output.pbtxt | 4 +- ...flow.estimator.export.-export-output.pbtxt | 2 +- ...low.estimator.export.-predict-output.pbtxt | 4 +- ....estimator.export.-regression-output.pbtxt | 4 +- ...mator.export.-serving-input-receiver.pbtxt | 4 +- ...xport.-tensor-serving-input-receiver.pbtxt | 4 +- tensorflow/tools/pip_package/setup.py | 1 + 154 files changed, 918 insertions(+), 56307 deletions(-) delete mode 100644 tensorflow/contrib/estimator/python/estimator/baseline_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/early_stopping_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/export_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/exporter_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/extenders_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/head_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/hooks_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/linear_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/logit_fns_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/multi_head_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/rnn_test.py delete mode 100644 tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py delete mode 100644 tensorflow/python/estimator/canned/baseline_test.py delete mode 100644 tensorflow/python/estimator/canned/boosted_trees_test.py delete mode 100644 tensorflow/python/estimator/canned/boosted_trees_utils_test.py delete mode 100644 tensorflow/python/estimator/canned/dnn_linear_combined_test.py delete mode 100644 tensorflow/python/estimator/canned/dnn_test.py delete mode 100644 tensorflow/python/estimator/canned/head_test.py delete mode 100644 tensorflow/python/estimator/canned/linear_test.py delete mode 100644 tensorflow/python/estimator/canned/optimizers_test.py delete mode 100644 tensorflow/python/estimator/canned/parsing_utils_test.py delete mode 100644 tensorflow/python/estimator/estimator_test.py delete mode 100644 tensorflow/python/estimator/export/export_output_test.py delete mode 100644 tensorflow/python/estimator/export/export_test.py delete mode 100644 tensorflow/python/estimator/exporter_test.py delete mode 100644 tensorflow/python/estimator/gc_test.py delete mode 100644 tensorflow/python/estimator/inputs/numpy_io_test.py delete mode 100644 tensorflow/python/estimator/inputs/pandas_io_test.py delete mode 100644 tensorflow/python/estimator/inputs/queues/feeding_functions_test.py delete mode 100644 tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py delete mode 100644 tensorflow/python/estimator/keras_test.py delete mode 100644 tensorflow/python/estimator/model_fn_test.py delete mode 100644 tensorflow/python/estimator/run_config_test.py delete mode 100644 tensorflow/python/estimator/training_test.py delete mode 100644 tensorflow/python/estimator/util_test.py diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 9b62a50452..8f4927324b 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -553,7 +553,6 @@ gen_api_init_files( py_library( name = "tensorflow_py", - srcs = ["//tensorflow/python/estimator/api:estimator_python_api_gen"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index 2de740e145..65172fd74a 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -23,18 +23,11 @@ import os as _os # pylint: disable=g-bad-import-order from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import -try: - # Add `estimator` attribute to allow access to estimator APIs via - # "tf.estimator..." - from tensorflow.python.estimator.api import estimator # pylint: disable=g-import-not-at-top - - # Add `estimator` to the __path__ to allow "from tensorflow.estimator..." - # style imports. - from tensorflow.python.estimator import api as estimator_api # pylint: disable=g-import-not-at-top - __path__ += [_os.path.dirname(estimator_api.__file__)] - del estimator_api -except (ImportError, AttributeError): - print('tf.estimator package not installed.') +from tensorflow.python.tools import component_api_helper +component_api_helper.package_hook( + parent_package_str=__name__, + child_package_str=('tensorflow_estimator.python.estimator.api.estimator')) +del component_api_helper # API IMPORTS PLACEHOLDER diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 1ea00fb7f3..8b99158b30 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -8,6 +8,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") +# PLACEHOLDER PIP REQUIREMENTS py_library( name = "estimator_py", @@ -20,6 +21,7 @@ py_library( ":dnn_linear_combined", ":dnn_with_layer_annotations", ":early_stopping", + ":expect_tensorflow_estimator_installed", ":export", ":exporter", ":extenders", @@ -32,6 +34,7 @@ py_library( ":rnn", ":saved_model_estimator", "//tensorflow:tensorflow_py_no_contrib", + "//tensorflow/python/estimator:estimator_py", ], ) @@ -40,98 +43,41 @@ py_library( srcs = ["python/estimator/baseline.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow/python/estimator", "//tensorflow/python/estimator:baseline", ], ) -py_test( - name = "baseline_test", - size = "small", - srcs = ["python/estimator/baseline_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - ":baseline", - ":head", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:metric_keys", - "//tensorflow/python/estimator:numpy_io", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "boosted_trees", srcs = ["python/estimator/boosted_trees.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow/python/estimator", "//tensorflow/python/estimator:boosted_trees", ], ) -py_test( - name = "boosted_trees_test", - size = "medium", - srcs = ["python/estimator/boosted_trees_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - ":boosted_trees", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:numpy_io", - "//third_party/py/numpy", - ], -) - py_library( name = "dnn", srcs = ["python/estimator/dnn.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", "//tensorflow/python/estimator:dnn", ], ) -py_test( - name = "dnn_test", - size = "medium", - srcs = ["python/estimator/dnn_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - "optonly", # times out http://b/79220679 - ], - deps = [ - ":dnn", - ":head", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:dnn_testing_utils", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:prediction_keys", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "dnn_with_layer_annotations", srcs = ["python/estimator/dnn_with_layer_annotations.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", "//tensorflow/python/estimator:head", @@ -140,64 +86,18 @@ py_library( ], ) -py_test( - name = "dnn_with_layer_annotations_test", - size = "medium", - srcs = ["python/estimator/dnn_with_layer_annotations_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", # b/67510291 - ], - deps = [ - ":dnn_with_layer_annotations", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:dnn", - "//tensorflow/python/estimator:dnn_testing_utils", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:pandas_io", - "//tensorflow/python/estimator:prediction_keys", - "@six_archive//:six", - ], -) - py_library( name = "dnn_linear_combined", srcs = ["python/estimator/dnn_linear_combined.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", "//tensorflow/python/estimator:dnn_linear_combined", ], ) -py_test( - name = "dnn_linear_combined_test", - size = "medium", - srcs = ["python/estimator/dnn_linear_combined_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - ":dnn_linear_combined", - ":head", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:dnn_testing_utils", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:linear_testing_utils", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:prediction_keys", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "extenders", srcs = [ @@ -205,6 +105,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", "//tensorflow/python/estimator:model_fn", @@ -213,23 +114,6 @@ py_library( ], ) -py_test( - name = "extenders_test", - size = "medium", - srcs = ["python/estimator/extenders_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], # b/62863147 - deps = [ - ":extenders", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/predictor", - "//tensorflow/python/estimator:estimator_py", - "//tensorflow/python/estimator:linear", - "//third_party/py/numpy", - ], -) - py_library( name = "export", srcs = [ @@ -237,22 +121,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python/estimator:model_fn", - ], -) - -py_test( - name = "export_test", - size = "medium", - srcs = ["python/estimator/export_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], # b/62863147 - deps = [ - ":export", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:export_output", + ":expect_tensorflow_estimator_installed", "//tensorflow/python/estimator:model_fn", ], ) @@ -264,24 +133,12 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:exporter", ], ) -py_test( - name = "exporter_test", - size = "medium", - srcs = ["python/estimator/exporter_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":exporter", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator", - "//tensorflow/python/estimator:exporter", - ], -) - py_library( name = "head", srcs = [ @@ -289,6 +146,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:head", @@ -298,22 +156,6 @@ py_library( ], ) -py_test( - name = "head_test", - size = "medium", - srcs = ["python/estimator/head_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":head", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:metric_keys", - "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/estimator:prediction_keys", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "hooks", srcs = [ @@ -321,58 +163,23 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:estimator_py", ], ) -py_test( - name = "hooks_test", - size = "medium", - srcs = ["python/estimator/hooks_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ - ":hooks", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:estimator_py", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "linear", srcs = ["python/estimator/linear.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow/python/estimator", "//tensorflow/python/estimator:linear", ], ) -py_test( - name = "linear_test", - size = "medium", - srcs = ["python/estimator/linear_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - ":head", - ":linear", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:linear_testing_utils", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:prediction_keys", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "logit_fns", srcs = [ @@ -380,24 +187,13 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:dnn", "//tensorflow/python/estimator:linear", ], ) -py_test( - name = "logit_fns_test", - size = "small", - srcs = ["python/estimator/logit_fns_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":logit_fns", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:model_fn", - ], -) - py_library( name = "multi_head", srcs = [ @@ -405,6 +201,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:head", @@ -414,23 +211,6 @@ py_library( ], ) -py_test( - name = "multi_head_test", - size = "small", - srcs = ["python/estimator/multi_head_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":head", - ":multi_head", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator:metric_keys", - "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/estimator:prediction_keys", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "replicate_model_fn", srcs = [ @@ -438,6 +218,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:model_fn", @@ -446,35 +227,12 @@ py_library( ], ) -cuda_py_test( - name = "replicate_model_fn_test", - size = "medium", - srcs = ["python/estimator/replicate_model_fn_test.py"], - additional_deps = [ - "@absl_py//absl/testing:parameterized", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator", - "//tensorflow/python/estimator:dnn", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:export_output", - "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:optimizers", - "//tensorflow/python/estimator:prediction_keys", - ":replicate_model_fn", - ], - tags = [ - "manual", - "multi_gpu", - "notap", - ], -) - py_library( name = "rnn", srcs = ["python/estimator/rnn.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", ":extenders", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/contrib/feature_column:feature_column_py", @@ -485,55 +243,22 @@ py_library( ], ) -py_test( - name = "rnn_test", - size = "medium", - srcs = ["python/estimator/rnn_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "noasan", # times out - "notsan", - "optonly", # times out http://b/79220679 - ], - deps = [ - ":head", - ":rnn", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/contrib/data", - "//tensorflow/python/estimator:numpy_io", - "//tensorflow/python/estimator:parsing_utils", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_library( name = "early_stopping", srcs = ["python/estimator/early_stopping.py"], srcs_version = "PY2AND3", deps = [ + ":expect_tensorflow_estimator_installed", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", ], ) -py_test( - name = "early_stopping_test", - srcs = ["python/estimator/early_stopping_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":early_stopping", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator", - "@absl_py//absl/testing:parameterized", - ], -) - py_library( name = "saved_model_estimator", srcs = ["python/estimator/saved_model_estimator.py"], deps = [ + ":expect_tensorflow_estimator_installed", ":export", "//tensorflow:tensorflow_py_no_contrib", "//tensorflow/python/estimator", @@ -542,21 +267,9 @@ py_library( ], ) -py_test( - name = "saved_model_estimator_test", - size = "medium", - srcs = ["python/estimator/saved_model_estimator_test.py"], - srcs_version = "PY2AND3", - tags = [ - "notsan", - ], - deps = [ - ":export", - ":saved_model_estimator", - "//tensorflow:tensorflow_py_no_contrib", - "//tensorflow/python/estimator", - "//tensorflow/python/estimator:export_export", - "//tensorflow/python/estimator:export_output", - "//tensorflow/python/estimator:model_fn", - ], +py_library( + name = "expect_tensorflow_estimator_installed", + # This is a dummy rule used as a dependency in open-source. + # We expect tensorflow_estimator to already be installed. + visibility = ["//visibility:public"], ) diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index 419609b1af..fc7d94e4fc 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,33 +12,37 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental utilities re:tf.estimator.*.""" +"""estimator python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" + +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.estimator.python.estimator.baseline import * -from tensorflow.contrib.estimator.python.estimator.boosted_trees import * -from tensorflow.contrib.estimator.python.estimator.dnn import * -from tensorflow.contrib.estimator.python.estimator.dnn_with_layer_annotations import * -from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import * -from tensorflow.contrib.estimator.python.estimator.early_stopping import * -from tensorflow.contrib.estimator.python.estimator.export import * -from tensorflow.contrib.estimator.python.estimator.extenders import * -from tensorflow.contrib.estimator.python.estimator.head import * -from tensorflow.contrib.estimator.python.estimator.hooks import * -from tensorflow.contrib.estimator.python.estimator.linear import * -from tensorflow.contrib.estimator.python.estimator.logit_fns import * -from tensorflow.contrib.estimator.python.estimator.multi_head import * -from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import * -from tensorflow.contrib.estimator.python.estimator.rnn import * -from tensorflow.contrib.estimator.python.estimator.saved_model_estimator import * -from tensorflow.python.estimator.export.export import * +# Importing from tensorflow.python.estimator +# is unsupported and will soon break! + +from tensorflow_estimator.contrib import estimator + +# Fixes remove_undocumented not working as intended. +# +# Problem is that when the below import happens (for first time, +# Python only imports things once), Python sets attribute named +# 'python' to this package. If this first import happens +# after the call to remove_undocumented, then the 'python' +# attribute won't be removed. +import tensorflow.contrib.estimator.python + +# Include attrs that start with single underscore. +estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')] +from tensorflow_estimator.contrib.estimator import * from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ 'add_metrics', diff --git a/tensorflow/contrib/estimator/python/estimator/baseline.py b/tensorflow/contrib/estimator/python/estimator/baseline.py index beffbee730..5a684befb6 100644 --- a/tensorflow/contrib/estimator/python/estimator/baseline.py +++ b/tensorflow/contrib/estimator/python/estimator/baseline.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,87 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Baseline estimators.""" +"""baseline python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import + from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import baseline - - -class BaselineEstimator(estimator.Estimator): - """An estimator that can establish a simple baseline. - - The estimator uses a user-specified head. - - This estimator ignores feature values and will learn to predict the average - value of each label. E.g. for single-label classification problems, this will - predict the probability distribution of the classes as seen in the labels. - For multi-label classification problems, it will predict the ratio of examples - that contain each class. - - Example: - - ```python - - # Build baseline multi-label classifier. - estimator = BaselineEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3)) - - # Input builders - def input_fn_train: # returns x, y (where y represents label's class index). - pass - - def input_fn_eval: # returns x, y (where y represents label's class index). - pass - - # Fit model. - estimator.train(input_fn=input_fn_train) - - # Evaluates cross entropy between the test and train labels. - loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] - - # For each class, predicts the ratio of training examples that contain the - # class. - predictions = classifier.predict(new_samples) - - ``` - - Input of `train` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * if `weight_column` passed to the `head` constructor is not `None`, a feature - with `key=weight_column` whose value is a `Tensor`. - """ +from tensorflow_estimator.contrib.estimator.python.estimator import baseline - def __init__(self, - head, - model_dir=None, - optimizer='Ftrl', - config=None): - """Initializes a BaselineEstimator instance. +# Include attrs that start with single underscore. +baseline.__all__ = [s for s in dir(baseline) if not s.startswith('__')] - Args: - head: A `_Head` instance constructed with a method such as - `tf.contrib.estimator.multi_label_head`. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator to - continue training a previously saved model. - optimizer: String, `tf.Optimizer` object, or callable that creates the - optimizer to use for training. If not specified, will use - `FtrlOptimizer` with a default learning rate of 0.3. - config: `RunConfig` object to configure the runtime settings. - """ - def _model_fn(features, labels, mode, config): - return baseline._baseline_model_fn( # pylint: disable=protected-access - features=features, - labels=labels, - mode=mode, - head=head, - optimizer=optimizer, - config=config) - super(BaselineEstimator, self).__init__( - model_fn=_model_fn, - model_dir=model_dir, - config=config) +# pylint: disable=g-import-not-at-top +from tensorflow_estimator.contrib.estimator.python.estimator.baseline import * diff --git a/tensorflow/contrib/estimator/python/estimator/baseline_test.py b/tensorflow/contrib/estimator/python/estimator/baseline_test.py deleted file mode 100644 index 513feb03b6..0000000000 --- a/tensorflow/contrib/estimator/python/estimator/baseline_test.py +++ /dev/null @@ -1,436 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for baseline.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import shutil -import tempfile - -import numpy as np -import six - -from tensorflow.contrib.estimator.python.estimator import baseline -from tensorflow.contrib.estimator.python.estimator import head as head_lib -from tensorflow.python.client import session as tf_session -from tensorflow.python.estimator.canned import metric_keys -from tensorflow.python.estimator.export import export -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column as feature_column_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variables -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.summary.writer import writer_cache -from tensorflow.python.training import checkpoint_utils -from tensorflow.python.training import optimizer -from tensorflow.python.training import saver - -# Names of variables created by model. -BIAS_NAME = 'baseline/bias' - - -def assert_close(expected, actual, rtol=1e-04, name='assert_close'): - with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: - expected = ops.convert_to_tensor(expected, name='expected') - actual = ops.convert_to_tensor(actual, name='actual') - rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected) - rtol = ops.convert_to_tensor(rtol, name='rtol') - return check_ops.assert_less( - rdiff, - rtol, - data=('Condition expected =~ actual did not hold element-wise:' - 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, - 'rtol = ', rtol,), - name=scope) - - -def save_variables_to_ckpt(model_dir): - init_all_op = [variables.global_variables_initializer()] - with tf_session.Session() as sess: - sess.run(init_all_op) - saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) - - -def _baseline_estimator_fn( - weight_column=None, label_dimension=1, *args, **kwargs): - """Returns a BaselineEstimator that uses regression_head.""" - return baseline.BaselineEstimator( - head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension, - # Tests in core (from which this test inherits) test the sum loss. - loss_reduction=losses.Reduction.SUM), - *args, **kwargs) - - -class BaselineEstimatorEvaluationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def test_evaluation_batch(self): - """Tests evaluation for batch_size==2.""" - with ops.Graph().as_default(): - variables.Variable([13.0], name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) - eval_metrics = baseline_estimator.evaluate( - input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1) - - # Logit is bias = 13, while label is 10. - # Loss per example is 3**2 = 9. - # Training loss is the sum over batch = 9 + 9 = 18 - # Average loss is the average over batch = 9 - self.assertDictEqual({ - metric_keys.MetricKeys.LOSS: 18., - metric_keys.MetricKeys.LOSS_MEAN: 9., - metric_keys.MetricKeys.PREDICTION_MEAN: 13., - metric_keys.MetricKeys.LABEL_MEAN: 10., - ops.GraphKeys.GLOBAL_STEP: 100 - }, eval_metrics) - - def test_evaluation_weights(self): - """Tests evaluation with weights.""" - with ops.Graph().as_default(): - variables.Variable([13.0], name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - def _input_fn(): - features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} - labels = ((10.,), (10.,)) - return features, labels - - baseline_estimator = _baseline_estimator_fn( - weight_column='weights', - model_dir=self._model_dir) - eval_metrics = baseline_estimator.evaluate(input_fn=_input_fn, steps=1) - - # Logit is bias = 13, while label is 10. - # Loss per example is 3**2 = 9. - # Training loss is the weighted sum over batch = 9 + 2*9 = 27 - # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 - self.assertDictEqual({ - metric_keys.MetricKeys.LOSS: 27., - metric_keys.MetricKeys.LOSS_MEAN: 9., - metric_keys.MetricKeys.PREDICTION_MEAN: 13., - metric_keys.MetricKeys.LABEL_MEAN: 10., - ops.GraphKeys.GLOBAL_STEP: 100 - }, eval_metrics) - - def test_evaluation_for_multi_dimensions(self): - label_dim = 2 - with ops.Graph().as_default(): - variables.Variable([46.0, 58.0], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_estimator = _baseline_estimator_fn( - label_dimension=label_dim, - model_dir=self._model_dir) - input_fn = numpy_io.numpy_input_fn( - x={ - 'age': np.array([[2., 4., 5.]]), - }, - y=np.array([[46., 58.]]), - batch_size=1, - num_epochs=None, - shuffle=False) - eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1) - - self.assertItemsEqual( - (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, - metric_keys.MetricKeys.PREDICTION_MEAN, - metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP), - eval_metrics.keys()) - - # Logit is bias which is [46, 58] - self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) - - -class BaselineEstimatorPredictTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def test_1d(self): - """Tests predict when all variables are one-dimensional.""" - with ops.Graph().as_default(): - variables.Variable([.2], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) - - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': np.array([[2.]])}, - y=None, - batch_size=1, - num_epochs=1, - shuffle=False) - predictions = baseline_estimator.predict(input_fn=predict_input_fn) - predicted_scores = list([x['predictions'] for x in predictions]) - # x * weight + bias = 2. * 10. + .2 = 20.2 - self.assertAllClose([[.2]], predicted_scores) - - def testMultiDim(self): - """Tests predict when all variables are multi-dimenstional.""" - batch_size = 2 - label_dimension = 3 - with ops.Graph().as_default(): - variables.Variable( # shape=[label_dimension] - [.2, .4, .6], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_estimator = _baseline_estimator_fn( - label_dimension=label_dimension, - model_dir=self._model_dir) - - predict_input_fn = numpy_io.numpy_input_fn( - # x shape=[batch_size, x_dim] - x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, - y=None, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - predictions = baseline_estimator.predict(input_fn=predict_input_fn) - predicted_scores = list([x['predictions'] for x in predictions]) - # score = bias, shape=[batch_size, label_dimension] - self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], - predicted_scores) - - -class BaselineEstimatorIntegrationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, - input_dimension, label_dimension, prediction_length): - feature_columns = [ - feature_column_lib.numeric_column('x', shape=(input_dimension,)) - ] - est = _baseline_estimator_fn( - label_dimension=label_dimension, - model_dir=self._model_dir) - - # TRAIN - # learn y = x - est.train(train_input_fn, steps=200) - - # EVALUTE - scores = est.evaluate(eval_input_fn) - self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) - self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) - - # PREDICT - predictions = np.array( - [x['predictions'] for x in est.predict(predict_input_fn)]) - self.assertAllEqual((prediction_length, label_dimension), predictions.shape) - - # EXPORT - feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = est.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - def test_numpy_input_fn(self): - """Tests complete flow with numpy_input_fn.""" - label_dimension = 2 - input_dimension = label_dimension - batch_size = 10 - prediction_length = batch_size - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) - - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=None, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - - self._test_complete_flow( - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=input_dimension, - label_dimension=label_dimension, - prediction_length=prediction_length) - - -class BaselineEstimatorTrainingTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _mock_optimizer(self, expected_loss=None): - expected_var_names = [ - '%s:0' % BIAS_NAME - ] - - def _minimize(loss, global_step=None, var_list=None): - trainable_vars = var_list or ops.get_collection( - ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertItemsEqual(expected_var_names, - [var.name for var in trainable_vars]) - - # Verify loss. We can't check the value directly, so we add an assert op. - self.assertEquals(0, loss.shape.ndims) - if expected_loss is None: - if global_step is not None: - return state_ops.assign_add(global_step, 1).op - return control_flow_ops.no_op() - assert_loss = assert_close( - math_ops.to_float(expected_loss, name='expected'), - loss, - name='assert_loss') - with ops.control_dependencies((assert_loss,)): - if global_step is not None: - return state_ops.assign_add(global_step, 1).op - return control_flow_ops.no_op() - - mock_optimizer = test.mock.NonCallableMock( - spec=optimizer.Optimizer, - wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) - mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) - - # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. - # So, return mock_optimizer itself for deepcopy. - mock_optimizer.__deepcopy__ = lambda _: mock_optimizer - return mock_optimizer - - def _assert_checkpoint(self, - label_dimension, - expected_global_step, - expected_bias=None): - shapes = { - name: shape - for (name, shape) in checkpoint_utils.list_variables(self._model_dir) - } - - self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(self._model_dir, - ops.GraphKeys.GLOBAL_STEP)) - - self.assertEqual([label_dimension], shapes[BIAS_NAME]) - if expected_bias is not None: - self.assertEqual(expected_bias, - checkpoint_utils.load_variable(self._model_dir, - BIAS_NAME)) - - def testFromScratch(self): - # Create BaselineRegressor. - label = 5. - age = 17 - # loss = (logits - label)^2 = (0 - 5.)^2 = 25. - mock_optimizer = self._mock_optimizer(expected_loss=25.) - baseline_estimator = _baseline_estimator_fn( - model_dir=self._model_dir, - optimizer=mock_optimizer) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - baseline_estimator.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - label_dimension=1, - expected_global_step=num_steps, - expected_bias=[0.]) - - def testFromCheckpoint(self): - # Create initial checkpoint. - bias = 7.0 - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable([bias], name=BIAS_NAME) - variables.Variable( - initial_global_step, - name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # logits = bias = 6. - # loss = (logits - label)^2 = (7 - 5)^2 = 4 - mock_optimizer = self._mock_optimizer(expected_loss=4.) - baseline_estimator = _baseline_estimator_fn( - model_dir=self._model_dir, - optimizer=mock_optimizer) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - baseline_estimator.train( - input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - label_dimension=1, - expected_global_step=initial_global_step + num_steps, - expected_bias=[bias]) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index b131ed4f12..e2a7d01530 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -12,414 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Boosted Trees estimators.""" +"""boosted_trees python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import + from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees -from tensorflow.python.estimator.canned import head as head_lib - - -def _validate_input_fn_and_repeat_dataset(train_input_fn): - """Validates whether the input_fn is valid, and repeat() if tf.Dataset.""" - def _input_fn(): - result_input_fn = train_input_fn() - if isinstance(result_input_fn, dataset_ops.Dataset): - return result_input_fn.repeat() - return result_input_fn - - return _input_fn - - -def _is_classification_head(head): - """Infers if the head is a classification head.""" - # Check using all classification heads defined in canned/head.py. However, it - # is not a complete list - it does not check for other classification heads - # not defined in the head library. - # pylint: disable=protected-access - return isinstance(head, - (head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss, - head_lib._MultiClassHeadWithSoftmaxCrossEntropyLoss)) - # pylint: enable=protected-access - - -class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase): # pylint: disable=protected-access - """An Estimator for Tensorflow Boosted Trees models.""" - - def __init__(self, - feature_columns, - n_batches_per_layer, - head, - model_dir=None, - weight_column=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - min_node_weight=0., - config=None, - center_bias=False, - pruning_mode='none'): - """Initializes a `BoostedTreesEstimator` instance. - - Args: - feature_columns: An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `FeatureColumn`. - n_batches_per_layer: the number of batches to collect statistics per - layer. - head: the `Head` instance defined for Estimator. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into an estimator - to continue training a previously saved model. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It is used to downweight or boost examples during training. It - will be multiplied by the loss of the example. If it is a string, it is - used as a key to fetch weight tensor from the `features`. If it is a - `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, - then weight_column.normalizer_fn is applied on it to get weight tensor. - n_trees: number trees to be created. - max_depth: maximum depth of the tree to grow. - learning_rate: shrinkage parameter to be used when a tree added to the - model. - l1_regularization: regularization multiplier applied to the absolute - weights of the tree leafs. - l2_regularization: regularization multiplier applied to the square weights - of the tree leafs. - tree_complexity: regularization factor to penalize trees with more leaves. - min_node_weight: minimum hessian a node must have for a split to be - considered. The value will be compared with sum(leaf_hessian)/ - (batch_size * n_batches_per_layer). - config: `RunConfig` object to configure the runtime settings. - center_bias: Whether bias centering needs to occur. Bias centering refers - to the first node in the very first tree returning the prediction that - is aligned with the original labels distribution. For example, for - regression problems, the first node will return the mean of the labels. - For binary classification problems, it will return a logit for a prior - probability of label 1. - pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre- - pruning (do not split a node if not enough gain is observed) and post - pruning (build the tree up to a max depth and then prune branches with - negative gain). For pre and post pruning, you MUST provide - tree_complexity >0. - - Raises: - ValueError: when wrong arguments are given or unsupported functionalities - are requested. - """ - # HParams for the model. - # pylint: disable=protected-access - tree_hparams = canned_boosted_trees._TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity, min_node_weight, center_bias, pruning_mode) - - def _model_fn(features, labels, mode, config): - return canned_boosted_trees._bt_model_fn( - features, - labels, - mode, - head, - feature_columns, - tree_hparams, - n_batches_per_layer, - config=config) - - super(_BoostedTreesEstimator, self).__init__( - model_fn=_model_fn, - model_dir=model_dir, - config=config, - feature_columns=feature_columns, - head=head, - center_bias=center_bias, - is_classification=_is_classification_head(head)) - # pylint: enable=protected-access - - -def boosted_trees_classifier_train_in_memory( - train_input_fn, - feature_columns, - model_dir=None, - n_classes=canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT, - weight_column=None, - label_vocabulary=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - min_node_weight=0., - config=None, - train_hooks=None, - center_bias=False, - pruning_mode='none'): - """Trains a boosted tree classifier with in memory dataset. - - Example: - - ```python - bucketized_feature_1 = bucketized_column( - numeric_column('feature_1'), BUCKET_BOUNDARIES_1) - bucketized_feature_2 = bucketized_column( - numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - - def train_input_fn(): - dataset = create-dataset-from-training-data - # This is tf.data.Dataset of a tuple of feature dict and label. - # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), - # Dataset.from_tensors(label_array))) - # The returned Dataset shouldn't be batched. - # If Dataset repeats, only the first repetition would be used for training. - return dataset - - classifier = boosted_trees_classifier_train_in_memory( - train_input_fn, - feature_columns=[bucketized_feature_1, bucketized_feature_2], - n_trees=100, - ... - ) - - def input_fn_eval(): - ... - return dataset - - metrics = classifier.evaluate(input_fn=input_fn_eval, steps=10) - ``` - - Args: - train_input_fn: the input function returns a dataset containing a single - epoch of *unbatched* features and labels. - feature_columns: An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `FeatureColumn`. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into an estimator - to continue training a previously saved model. - n_classes: number of label classes. Default is binary classification. - Multiclass support is not yet implemented. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It is used to downweight or boost examples during training. It - will be multiplied by the loss of the example. If it is a string, it is - used as a key to fetch weight tensor from the `features`. If it is a - `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, - then weight_column.normalizer_fn is applied on it to get weight tensor. - label_vocabulary: A list of strings represents possible label values. If - given, labels must be string type and have any value in - `label_vocabulary`. If it is not given, that means labels are - already encoded as integer or float within [0, 1] for `n_classes=2` and - encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . - Also there will be errors if vocabulary is not provided and labels are - string. - n_trees: number trees to be created. - max_depth: maximum depth of the tree to grow. - learning_rate: shrinkage parameter to be used when a tree added to the - model. - l1_regularization: regularization multiplier applied to the absolute - weights of the tree leafs. - l2_regularization: regularization multiplier applied to the square weights - of the tree leafs. - tree_complexity: regularization factor to penalize trees with more leaves. - min_node_weight: minimum hessian a node must have for a split to be - considered. The value will be compared with sum(leaf_hessian)/ - (batch_size * n_batches_per_layer). - config: `RunConfig` object to configure the runtime settings. - train_hooks: a list of Hook instances to be passed to estimator.train() - center_bias: Whether bias centering needs to occur. Bias centering refers - to the first node in the very first tree returning the prediction that - is aligned with the original labels distribution. For example, for - regression problems, the first node will return the mean of the labels. - For binary classification problems, it will return a logit for a prior - probability of label 1. - pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre- - pruning (do not split a node if not enough gain is observed) and post - pruning (build the tree up to a max depth and then prune branches with - negative gain). For pre and post pruning, you MUST provide - tree_complexity >0. - - Returns: - a `BoostedTreesClassifier` instance created with the given arguments and - trained with the data loaded up on memory from the input_fn. - - Raises: - ValueError: when wrong arguments are given or unsupported functionalities - are requested. - """ - # pylint: disable=protected-access - # TODO(nponomareva): Support multi-class cases. - if n_classes == canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT: - n_classes = 2 - head, closed_form = ( - canned_boosted_trees._create_classification_head_and_closed_form( - n_classes, weight_column, label_vocabulary=label_vocabulary)) - - # HParams for the model. - tree_hparams = canned_boosted_trees._TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity, min_node_weight, center_bias, pruning_mode) - - def _model_fn(features, labels, mode, config): - return canned_boosted_trees._bt_model_fn( - features, - labels, - mode, - head, - feature_columns, - tree_hparams, - n_batches_per_layer=1, - config=config, - closed_form_grad_and_hess_fn=closed_form, - train_in_memory=True) - - in_memory_classifier = estimator.Estimator( - model_fn=_model_fn, model_dir=model_dir, config=config) - - in_memory_classifier.train( - input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), - hooks=train_hooks) - - return in_memory_classifier - # pylint: enable=protected-access - - -def boosted_trees_regressor_train_in_memory( - train_input_fn, - feature_columns, - model_dir=None, - label_dimension=canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT, - weight_column=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - min_node_weight=0., - config=None, - train_hooks=None, - center_bias=False, - pruning_mode='none'): - """Trains a boosted tree regressor with in memory dataset. - - Example: - - ```python - bucketized_feature_1 = bucketized_column( - numeric_column('feature_1'), BUCKET_BOUNDARIES_1) - bucketized_feature_2 = bucketized_column( - numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - - def train_input_fn(): - dataset = create-dataset-from-training-data - # This is tf.data.Dataset of a tuple of feature dict and label. - # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), - # Dataset.from_tensors(label_array))) - # The returned Dataset shouldn't be batched. - # If Dataset repeats, only the first repetition would be used for training. - return dataset - - regressor = boosted_trees_regressor_train_in_memory( - train_input_fn, - feature_columns=[bucketized_feature_1, bucketized_feature_2], - n_trees=100, - ... - ) - - def input_fn_eval(): - ... - return dataset - - metrics = regressor.evaluate(input_fn=input_fn_eval, steps=10) - ``` - - Args: - train_input_fn: the input function returns a dataset containing a single - epoch of *unbatched* features and labels. - feature_columns: An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `FeatureColumn`. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into an estimator - to continue training a previously saved model. - label_dimension: Number of regression targets per example. - Multi-dimensional support is not yet implemented. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It is used to downweight or boost examples during training. It - will be multiplied by the loss of the example. If it is a string, it is - used as a key to fetch weight tensor from the `features`. If it is a - `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, - then weight_column.normalizer_fn is applied on it to get weight tensor. - n_trees: number trees to be created. - max_depth: maximum depth of the tree to grow. - learning_rate: shrinkage parameter to be used when a tree added to the - model. - l1_regularization: regularization multiplier applied to the absolute - weights of the tree leafs. - l2_regularization: regularization multiplier applied to the square weights - of the tree leafs. - tree_complexity: regularization factor to penalize trees with more leaves. - min_node_weight: minimum hessian a node must have for a split to be - considered. The value will be compared with sum(leaf_hessian)/ - (batch_size * n_batches_per_layer). - config: `RunConfig` object to configure the runtime settings. - train_hooks: a list of Hook instances to be passed to estimator.train(). - center_bias: Whether bias centering needs to occur. Bias centering refers - to the first node in the very first tree returning the prediction that - is aligned with the original labels distribution. For example, for - regression problems, the first node will return the mean of the labels. - For binary classification problems, it will return a logit for a prior - probability of label 1. - pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre- - pruning (do not split a node if not enough gain is observed) and post - pruning (build the tree up to a max depth and then prune branches with - negative gain). For pre and post pruning, you MUST provide - tree_complexity >0. - - Returns: - a `BoostedTreesClassifier` instance created with the given arguments and - trained with the data loaded up on memory from the input_fn. - - Raises: - ValueError: when wrong arguments are given or unsupported functionalities - are requested. - """ - # pylint: disable=protected-access - # TODO(nponomareva): Extend it to multi-dimension cases. - if label_dimension == canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT: - label_dimension = 1 - head = canned_boosted_trees._create_regression_head(label_dimension, - weight_column) - - # HParams for the model. - tree_hparams = canned_boosted_trees._TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity, min_node_weight, center_bias, pruning_mode) - - def _model_fn(features, labels, mode, config): - return canned_boosted_trees._bt_model_fn( - features, - labels, - mode, - head, - feature_columns, - tree_hparams, - n_batches_per_layer=1, - config=config, - train_in_memory=True) - - in_memory_regressor = estimator.Estimator( - model_fn=_model_fn, model_dir=model_dir, config=config) +from tensorflow_estimator.contrib.estimator.python.estimator import boosted_trees - in_memory_regressor.train( - input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), - hooks=train_hooks) +# Include attrs that start with single underscore. +boosted_trees.__all__ = [ + s for s in dir(boosted_trees) if not s.startswith('__') +] - return in_memory_regressor - # pylint: enable=protected-access +# pylint: disable=g-import-not-at-top +from tensorflow_estimator.contrib.estimator.python.estimator.boosted_trees import * diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py deleted file mode 100644 index e23d9c0fc4..0000000000 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py +++ /dev/null @@ -1,438 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests boosted_trees estimators.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.estimator.python.estimator import boosted_trees -from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.platform import googletest -from tensorflow.python.training import checkpoint_utils - -NUM_FEATURES = 3 - -BUCKET_BOUNDARIES = [-2., .5, 12.] # Boundaries for all the features. -INPUT_FEATURES = np.array( - [ - [12.5, 1.0, -2.001, -2.0001, -1.999], # feature_0 quantized:[3,2,0,0,1] - [2.0, -3.0, 0.5, 0.0, 0.4995], # feature_1 quantized:[2,0,2,1,1] - [3.0, 20.0, 50.0, -100.0, 102.75], # feature_2 quantized:[2,3,3,0,3] - ], - dtype=np.float32) -CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]] -REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]] -FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)} - - -def _make_train_input_fn(is_classification): - """Makes train input_fn for classification/regression.""" - - def _input_fn(): - features_dict = dict(FEATURES_DICT) - labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS - return features_dict, labels - - return _input_fn - - -def _make_train_input_fn_dataset(is_classification): - """Makes input_fn using Dataset.""" - - def _input_fn(): - features_dict = dict(FEATURES_DICT) - labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS - ds = dataset_ops.Dataset.zip( - (dataset_ops.Dataset.from_tensors(features_dict), - dataset_ops.Dataset.from_tensors(labels) - )) - return ds - - return _input_fn - - -class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._head = canned_boosted_trees._create_regression_head(label_dimension=1) - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } - - def _assert_checkpoint(self, model_dir, global_step, finalized_trees, - attempted_layers): - reader = checkpoint_utils.load_checkpoint(model_dir) - self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)) - serialized = reader.get_tensor('boosted_trees:0_serialized') - ensemble_proto = boosted_trees_pb2.TreeEnsemble() - ensemble_proto.ParseFromString(serialized) - self.assertEqual( - finalized_trees, - sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized])) - self.assertEqual(attempted_layers, - ensemble_proto.growing_metadata.num_layers_attempted) - - def testTrainAndEvaluateEstimator(self): - input_fn = _make_train_input_fn(is_classification=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - head=self._head, - max_depth=5) - - # It will stop after 10 steps because of the max depth and num trees. - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(input_fn, steps=num_steps) - self._assert_checkpoint( - est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) - eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 1.008551) - - def testTrainAndEvaluateEstimatorWithCenterBias(self): - input_fn = _make_train_input_fn(is_classification=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - head=self._head, - max_depth=5, - center_bias=True) - - # It will stop after 11 steps because of the max depth and num trees. - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(input_fn, steps=num_steps) - # 10 steps for training and 2 step for bias centering. - self._assert_checkpoint( - est.model_dir, global_step=12, finalized_trees=2, attempted_layers=10) - eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 0.614642) - - def testTrainAndEvaluateEstimatorWithPrePruning(self): - input_fn = _make_train_input_fn(is_classification=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - head=self._head, - max_depth=5, - tree_complexity=0.001, - pruning_mode='pre') - - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(input_fn, steps=num_steps) - # We stop actually after 2*depth*n_trees steps (via a hook) because we still - # could not grow 2 trees of depth 5 (due to pre-pruning). - self._assert_checkpoint( - est.model_dir, global_step=21, finalized_trees=0, attempted_layers=21) - eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 3.83943) - - def testTrainAndEvaluateEstimatorWithPostPruning(self): - input_fn = _make_train_input_fn(is_classification=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - head=self._head, - max_depth=5, - tree_complexity=0.001, - pruning_mode='post') - - # It will stop after 10 steps because of the max depth and num trees. - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(input_fn, steps=num_steps) - self._assert_checkpoint( - est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) - eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 2.37652) - - def testInferEstimator(self): - train_input_fn = _make_train_input_fn(is_classification=False) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=1, - max_depth=5, - head=self._head) - - # It will stop after 5 steps because of the max depth and num trees. - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint( - est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Validate predictions. - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose( - [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], - [pred['predictions'] for pred in predictions]) - - def testInferEstimatorWithCenterBias(self): - train_input_fn = _make_train_input_fn(is_classification=False) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=1, - max_depth=5, - center_bias=True, - head=self._head) - - # It will stop after 6 steps because of the max depth and num trees (5 for - # training and 2 for bias centering). - num_steps = 100 - # Train for a few steps, and validate final checkpoint. - est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint( - est.model_dir, global_step=7, finalized_trees=1, attempted_layers=5) - # Validate predictions. - predictions = list(est.predict(input_fn=predict_input_fn)) - - self.assertAllClose( - [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]], - [pred['predictions'] for pred in predictions]) - - def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self): - train_input_fn = _make_train_input_fn(is_classification=True) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, feature_columns=self._feature_columns, - n_trees=1, max_depth=5) - # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint( - est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['accuracy'], 1.0) - # Validate predictions. - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose([[0], [1], [1], [0], [0]], - [pred['class_ids'] for pred in predictions]) - - def testBinaryClassifierTrainInMemoryAndEvalAndInferWithCenterBias(self): - train_input_fn = _make_train_input_fn(is_classification=True) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5, - center_bias=True) - # It will stop after 5 steps + 3 for bias, because of the max depth and num - # trees. - self._assert_checkpoint( - est.model_dir, global_step=8, finalized_trees=1, attempted_layers=5) - - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['accuracy'], 1.0) - # Validate predictions. - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose([[0], [1], [1], [0], [0]], - [pred['class_ids'] for pred in predictions]) - - def testBinaryClassifierTrainInMemoryAndEvalAndInferWithPrePruning(self): - train_input_fn = _make_train_input_fn(is_classification=True) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5, - pruning_mode='pre', - tree_complexity=0.01) - # We stop actually after 2*depth*n_trees steps (via a hook) because we still - # could not grow 1 trees of depth 5 (due to pre-pruning). - self._assert_checkpoint( - est.model_dir, global_step=11, finalized_trees=0, attempted_layers=11) - - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['accuracy'], 1.0) - # Validate predictions. - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose([[0], [1], [1], [0], [0]], - [pred['class_ids'] for pred in predictions]) - - def testBinaryClassifierTrainInMemoryWithDataset(self): - train_input_fn = _make_train_input_fn_dataset(is_classification=True) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5) - # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint( - est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['accuracy'], 1.0) - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose([[0], [1], [1], [0], [0]], - [pred['class_ids'] for pred in predictions]) - - def testRegressorTrainInMemoryAndEvalAndInfer(self): - train_input_fn = _make_train_input_fn(is_classification=False) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_regressor_train_in_memory( - train_input_fn=train_input_fn, feature_columns=self._feature_columns, - n_trees=1, max_depth=5) - # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint( - est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 2.478283) - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose( - [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], - [pred['predictions'] for pred in predictions]) - - def testRegressorTrainInMemoryWithDataset(self): - train_input_fn = _make_train_input_fn_dataset(is_classification=False) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees.boosted_trees_regressor_train_in_memory( - train_input_fn=train_input_fn, feature_columns=self._feature_columns, - n_trees=1, max_depth=5) - # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint( - est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Check evaluate and predict. - eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 2.478283) - predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertAllClose( - [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], - [pred['predictions'] for pred in predictions]) - - -class BoostedTreesDebugOutputTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._head = canned_boosted_trees._create_regression_head(label_dimension=1) - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) for i in range(NUM_FEATURES) - } - - def testContribEstimatorThatDFCIsInPredictions(self): - # pylint:disable=protected-access - head = canned_boosted_trees._create_regression_head(label_dimension=1) - train_input_fn = _make_train_input_fn(is_classification=False) - predict_input_fn = numpy_io.numpy_input_fn( - x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - - est = boosted_trees._BoostedTreesEstimator( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - head=head, - n_trees=1, - max_depth=5, - center_bias=True) - # pylint:enable=protected-access - - num_steps = 100 - # Train for a few steps. Validate debug outputs in prediction dicts. - est.train(train_input_fn, steps=num_steps) - debug_predictions = est.experimental_predict_with_explanations( - predict_input_fn) - biases, dfcs = zip(*[(pred['bias'], pred['dfc']) - for pred in debug_predictions]) - self.assertAllClose([1.8] * 5, biases) - self.assertAllClose(({ - 0: -0.070499420166015625, - 1: -0.095000028610229492, - 2: 0.0 - }, { - 0: -0.53763031959533691, - 1: 0.063333392143249512, - 2: 0.0 - }, { - 0: -0.51756942272186279, - 1: -0.095000028610229492, - 2: 0.0 - }, { - 0: 0.1563495397567749, - 1: 0.063333392143249512, - 2: 0.0 - }, { - 0: 0.96934974193572998, - 1: 0.063333392143249512, - 2: 0.0 - }), dfcs) - - # Assert sum(dfcs) + bias == predictions. - expected_predictions = [[1.6345005], [1.32570302], [1.1874305], - [2.01968288], [2.83268309]] - predictions = [ - [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases) - ] - self.assertAllClose(expected_predictions, predictions) - - # Test when user doesn't include bias or dfc in predict_keys. - debug_predictions = est.experimental_predict_with_explanations( - predict_input_fn, predict_keys=['predictions']) - for prediction_dict in debug_predictions: - self.assertTrue('bias' in prediction_dict) - self.assertTrue('dfc' in prediction_dict) - self.assertTrue('predictions' in prediction_dict) - self.assertEqual(len(prediction_dict), 3) - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/contrib/estimator/python/estimator/dnn.py b/tensorflow/contrib/estimator/python/estimator/dnn.py index 9efa8f474d..6b260de9e3 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,153 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators.""" +"""dnn python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import dnn as dnn_lib -from tensorflow.python.ops import nn - - -class DNNEstimator(estimator.Estimator): - """An estimator for TensorFlow DNN models with user-specified head. - - Example: - - ```python - sparse_feature_a = sparse_column_with_hash_bucket(...) - sparse_feature_b = sparse_column_with_hash_bucket(...) - - sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a, - ...) - sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b, - ...) - - estimator = DNNEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3), - feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], - hidden_units=[1024, 512, 256]) - - # Or estimator using the ProximalAdagradOptimizer optimizer with - # regularization. - estimator = DNNEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3), - feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], - hidden_units=[1024, 512, 256], - optimizer=tf.train.ProximalAdagradOptimizer( - learning_rate=0.1, - l1_regularization_strength=0.001 - )) - - # Or estimator using an optimizer with a learning rate decay. - estimator = DNNEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3), - feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], - hidden_units=[1024, 512, 256], - optimizer=lambda: tf.AdamOptimizer( - learning_rate=tf.exponential_decay( - learning_rate=0.1, - global_step=tf.get_global_step(), - decay_steps=10000, - decay_rate=0.96)) - - # Or estimator with warm-starting from a previous checkpoint. - estimator = DNNEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3), - feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], - hidden_units=[1024, 512, 256], - warm_start_from="/path/to/checkpoint/dir") - - # Input builders - def input_fn_train: # returns x, y - pass - estimator.train(input_fn=input_fn_train, steps=100) - - def input_fn_eval: # returns x, y - pass - metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) - def input_fn_predict: # returns x, None - pass - predictions = estimator.predict(input_fn=input_fn_predict) - ``` - - Input of `train` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * if `weight_column` is not `None`, a feature with - `key=weight_column` whose value is a `Tensor`. - * for each `column` in `feature_columns`: - - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` - whose `value` is a `SparseTensor`. - - if `column` is a `_WeightedCategoricalColumn`, two features: the first - with `key` the id column name, the second with `key` the weight column - name. Both features' `value` must be a `SparseTensor`. - - if `column` is a `_DenseColumn`, a feature with `key=column.name` - whose `value` is a `Tensor`. - - Loss and predicted output are determined by the specified head. - """ +from tensorflow_estimator.contrib.estimator.python.estimator import dnn - def __init__(self, - head, - hidden_units, - feature_columns, - model_dir=None, - optimizer='Adagrad', - activation_fn=nn.relu, - dropout=None, - input_layer_partitioner=None, - config=None, - warm_start_from=None, - batch_norm=False): - """Initializes a `DNNEstimator` instance. +# Include attrs that start with single underscore. +dnn.__all__ = [s for s in dir(dnn) if not s.startswith('__')] - Args: - head: A `_Head` instance constructed with a method such as - `tf.contrib.estimator.multi_label_head`. - hidden_units: Iterable of number hidden units per layer. All layers are - fully connected. Ex. `[64, 32]` means first layer has 64 nodes and - second one has 32. - feature_columns: An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `_FeatureColumn`. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator to - continue training a previously saved model. - optimizer: An instance of `tf.Optimizer` used to train the model. Can also - be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or - callable. Defaults to Adagrad optimizer. - activation_fn: Activation function applied to each layer. If `None`, will - use `tf.nn.relu`. - dropout: When not `None`, the probability we will drop out a given - coordinate. - input_layer_partitioner: Optional. Partitioner for input layer. Defaults - to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. - config: `RunConfig` object to configure the runtime settings. - warm_start_from: A string filepath to a checkpoint to warm-start from, or - a `WarmStartSettings` object to fully configure warm-starting. If the - string filepath is provided instead of a `WarmStartSettings`, then all - weights are warm-started, and it is assumed that vocabularies and Tensor - names are unchanged. - batch_norm: Whether to use batch normalization after each hidden layer. - """ - def _model_fn(features, labels, mode, config): - return dnn_lib._dnn_model_fn( # pylint: disable=protected-access - features=features, - labels=labels, - mode=mode, - head=head, - hidden_units=hidden_units, - feature_columns=tuple(feature_columns or []), - optimizer=optimizer, - activation_fn=activation_fn, - dropout=dropout, - input_layer_partitioner=input_layer_partitioner, - config=config, - batch_norm=batch_norm) - super(DNNEstimator, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config, - warm_start_from=warm_start_from) +# pylint: disable=g-import-not-at-top +from tensorflow_estimator.contrib.estimator.python.estimator.dnn import * diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py index 4e7965ef26..24655c9964 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,171 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow estimator for Linear and DNN joined training models.""" +"""dnn_linear_combined python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import dnn_linear_combined as dnn_linear_combined_lib -from tensorflow.python.ops import nn - - -class DNNLinearCombinedEstimator(estimator.Estimator): - """An estimator for TensorFlow Linear and DNN joined models with custom head. - - Note: This estimator is also known as wide-n-deep. - - Example: - - ```python - numeric_feature = numeric_column(...) - categorical_column_a = categorical_column_with_hash_bucket(...) - categorical_column_b = categorical_column_with_hash_bucket(...) - - categorical_feature_a_x_categorical_feature_b = crossed_column(...) - categorical_feature_a_emb = embedding_column( - categorical_column=categorical_feature_a, ...) - categorical_feature_b_emb = embedding_column( - categorical_column=categorical_feature_b, ...) - - estimator = DNNLinearCombinedEstimator( - head=tf.contrib.estimator.multi_label_head(n_classes=3), - # wide settings - linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], - linear_optimizer=tf.train.FtrlOptimizer(...), - # deep settings - dnn_feature_columns=[ - categorical_feature_a_emb, categorical_feature_b_emb, - numeric_feature], - dnn_hidden_units=[1000, 500, 100], - dnn_optimizer=tf.train.ProximalAdagradOptimizer(...)) - - # To apply L1 and L2 regularization, you can set dnn_optimizer to: - tf.train.ProximalAdagradOptimizer( - learning_rate=0.1, - l1_regularization_strength=0.001, - l2_regularization_strength=0.001) - # To apply learning rate decay, you can set dnn_optimizer to a callable: - lambda: tf.AdamOptimizer( - learning_rate=tf.exponential_decay( - learning_rate=0.1, - global_step=tf.get_global_step(), - decay_steps=10000, - decay_rate=0.96) - # It is the same for linear_optimizer. - - # Input builders - def input_fn_train: # returns x, y - pass - estimator.train(input_fn=input_fn_train, steps=100) - - def input_fn_eval: # returns x, y - pass - metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) - def input_fn_predict: # returns x, None - pass - predictions = estimator.predict(input_fn=input_fn_predict) - ``` - - Input of `train` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` - whose `value` is a `SparseTensor`. - - if `column` is a `_WeightedCategoricalColumn`, two features: the first - with `key` the id column name, the second with `key` the weight column - name. Both features' `value` must be a `SparseTensor`. - - if `column` is a `_DenseColumn`, a feature with `key=column.name` - whose `value` is a `Tensor`. - - Loss is calculated by using mean squared error. - - @compatibility(eager) - Estimators are not compatible with eager execution. - @end_compatibility - """ - - def __init__(self, - head, - model_dir=None, - linear_feature_columns=None, - linear_optimizer='Ftrl', - dnn_feature_columns=None, - dnn_optimizer='Adagrad', - dnn_hidden_units=None, - dnn_activation_fn=nn.relu, - dnn_dropout=None, - input_layer_partitioner=None, - config=None, - linear_sparse_combiner='sum'): - """Initializes a DNNLinearCombinedEstimator instance. - - Args: - head: A `_Head` instance constructed with a method such as - `tf.contrib.estimator.multi_label_head`. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into an estimator - to continue training a previously saved model. - linear_feature_columns: An iterable containing all the feature columns - used by linear part of the model. All items in the set must be - instances of classes derived from `FeatureColumn`. - linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to - the linear part of the model. Can also be a string (one of 'Adagrad', - 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL - optimizer. - dnn_feature_columns: An iterable containing all the feature columns used - by deep part of the model. All items in the set must be instances of - classes derived from `FeatureColumn`. - dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to - the deep part of the model. Can also be a string (one of 'Adagrad', - 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad - optimizer. - dnn_hidden_units: List of hidden units per layer. All layers are fully - connected. - dnn_activation_fn: Activation function applied to each layer. If None, - will use `tf.nn.relu`. - dnn_dropout: When not None, the probability we will drop out - a given coordinate. - input_layer_partitioner: Partitioner for input layer. Defaults to - `min_max_variable_partitioner` with `min_slice_size` 64 << 20. - config: RunConfig object to configure the runtime settings. - linear_sparse_combiner: A string specifying how to reduce the linear model - if a categorical column is multivalent. One of "mean", "sqrtn", and - "sum" -- these are effectively different ways to do example-level - normalization, which can be useful for bag-of-words features. For more - details, see `tf.feature_column.linear_model`. - - Raises: - ValueError: If both linear_feature_columns and dnn_features_columns are - empty at the same time. - """ - linear_feature_columns = linear_feature_columns or [] - dnn_feature_columns = dnn_feature_columns or [] - self._feature_columns = ( - list(linear_feature_columns) + list(dnn_feature_columns)) - if not self._feature_columns: - raise ValueError('Either linear_feature_columns or dnn_feature_columns ' - 'must be defined.') +from tensorflow_estimator.contrib.estimator.python.estimator import dnn_linear_combined - def _model_fn(features, labels, mode, config): - return dnn_linear_combined_lib._dnn_linear_combined_model_fn( # pylint: disable=protected-access - features=features, - labels=labels, - mode=mode, - head=head, - linear_feature_columns=linear_feature_columns, - linear_optimizer=linear_optimizer, - dnn_feature_columns=dnn_feature_columns, - dnn_optimizer=dnn_optimizer, - dnn_hidden_units=dnn_hidden_units, - dnn_activation_fn=dnn_activation_fn, - dnn_dropout=dnn_dropout, - input_layer_partitioner=input_layer_partitioner, - config=config, - linear_sparse_combiner=linear_sparse_combiner) +# Include attrs that start with single underscore. +dnn_linear_combined.__all__ = [ + s for s in dir(dnn_linear_combined) if not s.startswith('__') +] - super(DNNLinearCombinedEstimator, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) +# pylint: disable=g-import-not-at-top +from tensorflow_estimator.contrib.estimator.python.estimator.dnn_linear_combined import * diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py deleted file mode 100644 index 51b9ce7005..0000000000 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dnn_linear_combined.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import shutil -import tempfile - -import numpy as np -import six - -from tensorflow.contrib.estimator.python.estimator import dnn_linear_combined -from tensorflow.contrib.estimator.python.estimator import head as head_lib -from tensorflow.python.estimator.canned import dnn_testing_utils -from tensorflow.python.estimator.canned import linear_testing_utils -from tensorflow.python.estimator.canned import prediction_keys -from tensorflow.python.estimator.export import export -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column -from tensorflow.python.framework import ops -from tensorflow.python.ops import nn -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.summary.writer import writer_cache - - -def _dnn_only_estimator_fn( - hidden_units, - feature_columns, - model_dir=None, - label_dimension=1, - weight_column=None, - optimizer='Adagrad', - activation_fn=nn.relu, - dropout=None, - input_layer_partitioner=None, - config=None): - return dnn_linear_combined.DNNLinearCombinedEstimator( - head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension, - # Tests in core (from which this test inherits) test the sum loss. - loss_reduction=losses.Reduction.SUM), - model_dir=model_dir, - dnn_feature_columns=feature_columns, - dnn_optimizer=optimizer, - dnn_hidden_units=hidden_units, - dnn_activation_fn=activation_fn, - dnn_dropout=dropout, - input_layer_partitioner=input_layer_partitioner, - config=config) - - -class DNNOnlyEstimatorEvaluateTest( - dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( - self, _dnn_only_estimator_fn) - - -class DNNOnlyEstimatorPredictTest( - dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( - self, _dnn_only_estimator_fn) - - -class DNNOnlyEstimatorTrainTest( - dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( - self, _dnn_only_estimator_fn) - - -def _linear_only_estimator_fn( - feature_columns, - model_dir=None, - label_dimension=1, - weight_column=None, - optimizer='Ftrl', - config=None, - partitioner=None, - sparse_combiner='sum'): - return dnn_linear_combined.DNNLinearCombinedEstimator( - head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension, - # Tests in core (from which this test inherits) test the sum loss. - loss_reduction=losses.Reduction.SUM), - model_dir=model_dir, - linear_feature_columns=feature_columns, - linear_optimizer=optimizer, - input_layer_partitioner=partitioner, - config=config, - linear_sparse_combiner=sparse_combiner) - - -class LinearOnlyEstimatorEvaluateTest( - linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( - self, _linear_only_estimator_fn) - - -class LinearOnlyEstimatorPredictTest( - linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - linear_testing_utils.BaseLinearRegressorPredictTest.__init__( - self, _linear_only_estimator_fn) - - -class LinearOnlyEstimatorTrainTest( - linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( - self, _linear_only_estimator_fn) - - -class DNNLinearCombinedEstimatorIntegrationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _test_complete_flow( - self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, - label_dimension, batch_size): - linear_feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,))] - dnn_feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,))] - feature_columns = linear_feature_columns + dnn_feature_columns - est = dnn_linear_combined.DNNLinearCombinedEstimator( - head=head_lib.regression_head(label_dimension=label_dimension), - linear_feature_columns=linear_feature_columns, - dnn_feature_columns=dnn_feature_columns, - dnn_hidden_units=(2, 2), - model_dir=self._model_dir) - - # TRAIN - num_steps = 10 - est.train(train_input_fn, steps=num_steps) - - # EVALUTE - scores = est.evaluate(eval_input_fn) - self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) - self.assertIn('loss', six.iterkeys(scores)) - - # PREDICT - predictions = np.array([ - x[prediction_keys.PredictionKeys.PREDICTIONS] - for x in est.predict(predict_input_fn) - ]) - self.assertAllEqual((batch_size, label_dimension), predictions.shape) - - # EXPORT - feature_spec = feature_column.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = est.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - def test_numpy_input_fn(self): - """Tests complete flow with numpy_input_fn.""" - label_dimension = 2 - batch_size = 10 - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) - # learn y = x - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - batch_size=batch_size, - shuffle=False) - - self._test_complete_flow( - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=label_dimension, - label_dimension=label_dimension, - batch_size=batch_size) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_test.py deleted file mode 100644 index 050b0428bf..0000000000 --- a/tensorflow/contrib/estimator/python/estimator/dnn_test.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dnn.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import shutil -import tempfile - -import numpy as np -import six - -from tensorflow.contrib.estimator.python.estimator import dnn -from tensorflow.contrib.estimator.python.estimator import head as head_lib -from tensorflow.python.estimator.canned import dnn_testing_utils -from tensorflow.python.estimator.canned import prediction_keys -from tensorflow.python.estimator.export import export -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column -from tensorflow.python.framework import ops -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.summary.writer import writer_cache - - -def _dnn_estimator_fn(weight_column=None, label_dimension=1, *args, **kwargs): # pylint: disable=keyword-arg-before-vararg - """Returns a DNNEstimator that uses regression_head.""" - return dnn.DNNEstimator( - head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension, - # Tests in core (from which this test inherits) test the sum loss. - loss_reduction=losses.Reduction.SUM), - *args, **kwargs) - - -def _dnn_estimator_classifier_fn(n_classes=3, *args, **kwargs): # pylint: disable=keyword-arg-before-vararg - """Returns a DNNEstimator that uses multi_class_head.""" - return dnn.DNNEstimator(head=head_lib.multi_class_head(n_classes=n_classes), - *args, **kwargs) - - -class DNNEstimatorEvaluateTest( - dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( - self, _dnn_estimator_fn) - - -class DNNEstimatorPredictTest( - dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( - self, _dnn_estimator_fn) - - -class DNNEstimatorTrainTest( - dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( - self, _dnn_estimator_fn) - - -class DNNEstimatorWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest, - test.TestCase): - - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - dnn_testing_utils.BaseDNNWarmStartingTest.__init__( - self, _dnn_estimator_classifier_fn, _dnn_estimator_fn) - - -class DNNEstimatorIntegrationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _test_complete_flow( - self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, - label_dimension, batch_size): - feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,))] - est = dnn.DNNEstimator( - head=head_lib.regression_head(label_dimension=label_dimension), - hidden_units=(2, 2), - feature_columns=feature_columns, - model_dir=self._model_dir) - - # TRAIN - num_steps = 10 - est.train(train_input_fn, steps=num_steps) - - # EVALUTE - scores = est.evaluate(eval_input_fn) - self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) - self.assertIn('loss', six.iterkeys(scores)) - - # PREDICT - predictions = np.array([ - x[prediction_keys.PredictionKeys.PREDICTIONS] - for x in est.predict(predict_input_fn) - ]) - self.assertAllEqual((batch_size, label_dimension), predictions.shape) - - # EXPORT - feature_spec = feature_column.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = est.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - def test_numpy_input_fn(self): - """Tests complete flow with numpy_input_fn.""" - label_dimension = 2 - batch_size = 10 - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) - # learn y = x - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - batch_size=batch_size, - shuffle=False) - - self._test_complete_flow( - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=label_dimension, - label_dimension=label_dimension, - batch_size=batch_size) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py index 40a91175b7..29cbdeeb76 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,425 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators with layer annotations.""" +"""dnn_with_layer_annotations python module. + +Importing from tensorflow.python.estimator +is unsupported and will soon break! +""" +# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import from __future__ import absolute_import from __future__ import division from __future__ import print_function -import contextlib -import pickle - -from google.protobuf.any_pb2 import Any - -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import dnn -from tensorflow.python.feature_column import feature_column as feature_column_lib -from tensorflow.python.framework import ops -from tensorflow.python.ops import nn -from tensorflow.python.ops.losses import losses -from tensorflow.python.saved_model import utils as saved_model_utils - - -class LayerAnnotationsCollectionNames(object): - """Names for the collections containing the annotations.""" - - UNPROCESSED_FEATURES = 'layer_annotations/unprocessed_features' - PROCESSED_FEATURES = 'layer_annotatons/processed_features' - FEATURE_COLUMNS = 'layer_annotations/feature_columns' - - @classmethod - def keys(cls, collection_name): - return '%s/keys' % collection_name - - @classmethod - def values(cls, collection_name): - return '%s/values' % collection_name - - -def serialize_feature_column(feature_column): - if isinstance(feature_column, feature_column_lib._EmbeddingColumn): # pylint: disable=protected-access - # We can't pickle nested functions, and we don't need the value of - # layer_creator in most cases anyway, so just discard its value. - args = feature_column._asdict() - args['layer_creator'] = None - temp = type(feature_column)(**args) - return pickle.dumps(temp) - return pickle.dumps(feature_column) - - -def _to_any_wrapped_tensor_info(tensor): - """Converts a `Tensor` to a `TensorInfo` wrapped in a proto `Any`.""" - any_buf = Any() - tensor_info = saved_model_utils.build_tensor_info(tensor) - any_buf.Pack(tensor_info) - return any_buf - - -def make_input_layer_with_layer_annotations(original_input_layer): - """Make an input_layer replacement function that adds layer annotations.""" - - def input_layer_with_layer_annotations(features, - feature_columns, - weight_collections=None, - trainable=True, - cols_to_vars=None, - scope=None, - cols_to_output_tensors=None, - from_template=False): - """Returns a dense `Tensor` as input layer based on given `feature_columns`. - - Generally a single example in training data is described with - FeatureColumns. - At the first layer of the model, this column oriented data should be - converted - to a single `Tensor`. - - This is like tf.feature_column.input_layer, except with added - Integrated-Gradient annotations. - - Args: - features: A mapping from key to tensors. `_FeatureColumn`s look up via - these keys. For example `numeric_column('price')` will look at 'price' - key in this dict. Values can be a `SparseTensor` or a `Tensor` depends - on corresponding `_FeatureColumn`. - feature_columns: An iterable containing the FeatureColumns to use as - inputs to your model. All items should be instances of classes derived - from `_DenseColumn` such as `numeric_column`, `embedding_column`, - `bucketized_column`, `indicator_column`. If you have categorical - features, you can wrap them with an `embedding_column` or - `indicator_column`. - weight_collections: A list of collection names to which the Variable will - be added. Note that variables will also be added to collections - `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. - trainable: If `True` also add the variable to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - cols_to_vars: If not `None`, must be a dictionary that will be filled with - a mapping from `_FeatureColumn` to list of `Variable`s. For example, - after the call, we might have cols_to_vars = {_EmbeddingColumn( - categorical_column=_HashedCategoricalColumn( key='sparse_feature', - hash_bucket_size=5, dtype=tf.string), dimension=10): [